Skip to content

Commit

Permalink
fix seed and remove interactive
Browse files Browse the repository at this point in the history
  • Loading branch information
Melkiades committed Jul 18, 2024
1 parent 4c61f61 commit b164d74
Show file tree
Hide file tree
Showing 8 changed files with 30 additions and 14 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ Suggests:
rmarkdown (>= 2.23),
testthat (>= 3.0.4),
withr (>= 2.0.0),
ggplot2
ggplot2,
Hmisc
VignetteBuilder:
knitr
RdMacros:
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# random.cdisc.data 0.3.15.9004
### Enhancements
* Added `reduce_num_levels_in_df` for dimensionality control in realistic data to maximize information content while limiting the number of levels in categorical variables.
* Added vignette about data handling called `data_pre_processing.Rmd`.

### Miscellaneous
* Renamed `var_relabel` into `rcd_var_relabel` and copied new functionalities to avoid namespace conflict with `formatters` package.
Expand Down
8 changes: 5 additions & 3 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#' @param x (`character` or `factor`)\cr If character vector then it is also used
#' as levels of the returned factor. If factor then the levels are used as the new levels.
#' @param N (`numeric(1)`)\cr Number of items to choose.
#' @param random_seed (`numeric(1)`)\cr Seed for random number generation. Default is `Sys.time()`.
#' @param random_seed (`numeric(1)` or `NULL`)\cr Seed for random number generation.
#' @param ... Additional arguments to be passed to `sample`.
#'
#' @return A factor of length `N`.
Expand All @@ -16,10 +16,12 @@
#' sample_fct(iris$Species, 10)
#'
#' @export
sample_fct <- function(x, N, random_seed = Sys.time(), ...) { # nolint
sample_fct <- function(x, N, random_seed = NULL, ...) { # nolint
checkmate::assert_number(N)

set.seed(random_seed)
if (!is.null(random_seed)) {
set.seed(random_seed)
}

factor(sample(x, N, replace = TRUE, ...), levels = if (is.factor(x)) levels(x) else x)
}
Expand Down
14 changes: 9 additions & 5 deletions R/utils_dim_control_and_checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#' @param add_specific_value (`character`)\cr specific values to keep.
#' @param keep_spec_rows (`integer`)\cr specific rows to keep.
#' @param explorative (`logical(1)`)\cr if `TRUE`, a plot with the frequency distribution of levels is shown.
#' @param verbose (`logical(1)`)\cr if `TRUE`, messages are printed.
#'
#' @details
#' If necessary, a number of additional rare values can be picked from the least represented levels
Expand Down Expand Up @@ -48,7 +49,8 @@ reduce_num_levels_in_df <- function(dt,
num_of_rare_values = 0,
add_specific_value = NULL,
keep_spec_rows = NULL,
explorative = FALSE) {
explorative = FALSE,
verbose = TRUE) {
checkmate::assert_number(p_to_keep, lower = 0, upper = 1)
checkmate::assert_data_frame(dt)
checkmate::assert_string(variable)
Expand All @@ -59,6 +61,7 @@ reduce_num_levels_in_df <- function(dt,
lower = 1, upper = nrow(dt), unique = TRUE
)
checkmate::assert_flag(explorative)
checkmate::assert_flag(verbose)
cur_vec <- dt[[variable]]

if (is.factor(cur_vec)) {
Expand All @@ -68,7 +71,7 @@ reduce_num_levels_in_df <- function(dt,
lev_freq <- sort(table(cur_vec), decreasing = TRUE)

# Explorative plot
if (explorative && interactive()) {
if (explorative) {
require(ggplot2)
plot_tbl <- tibble(
level = names(lev_freq),
Expand Down Expand Up @@ -138,8 +141,6 @@ reduce_num_levels_in_df <- function(dt,
annotate("text", x = annot_x, y = annot_y, label = annot_label, vjust = 0, hjust = 0)
}

print(gg)

# Effective calculations
} else {
checkmate::assert_int(num_of_rare_values, lower = 0, upper = length(lev_freq))
Expand Down Expand Up @@ -179,7 +180,7 @@ reduce_num_levels_in_df <- function(dt,
}
}

if (interactive()) {
if (verbose) {
if (length(keep_spec_rows) > 0) {
core_msg <- paste0(
length(lev_to_keep), " + ", length(keep_spec_rows), " (from keep_spec_rows) levels out of ",
Expand All @@ -206,6 +207,9 @@ reduce_num_levels_in_df <- function(dt,
out <- out %>%
mutate(!!sym(variable) := factor(!!sym(variable)))

if (explorative) {
return(gg)
}
invisible(out)
}
}
5 changes: 4 additions & 1 deletion man/reduce_num_levels_in_df.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/sample_fct.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions tests/testthat/test-utils_dim_control_and_checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ test_that("reduce_num_levels_in_df(num_max_values, num_of_rare_values) works", {
})

test_that("reduce_num_levels_in_df(add_specific_value) works", {
cadae_tmp <- cadae %>% mutate(AEDECOD = as.character(AEDECOD))
cadae_tmp <- random.cdisc.data::cadae %>% mutate(AEDECOD = as.character(AEDECOD))
cadae_tmp$AEDECOD[1] <- "an_outlier"
rlang::with_interactive(
expect_message(
Expand All @@ -58,7 +58,7 @@ test_that("reduce_num_levels_in_df(add_specific_value) works", {
})

test_that("reduce_num_levels_in_df(add_specific_value) works", {
cadae_tmp <- cadae %>% mutate(AEDECOD = as.character(AEDECOD))
cadae_tmp <- random.cdisc.data::cadae %>% mutate(AEDECOD = as.character(AEDECOD))
cadae_tmp$AEDECOD[1] <- "an_outlier"
rlang::with_interactive(
expect_message(
Expand Down
5 changes: 5 additions & 0 deletions vignettes/data_pre_processing.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@ For our purposes, we only use `random.cdisc.data::cadae` as example data because

```{r}
library(random.cdisc.data)
data("cadae")
```

```{r}
# desc_ae <- Hmisc::describe(cadae)
# desc_ae$SEX
```

## Reducing number of levels per variable
Expand Down

0 comments on commit b164d74

Please sign in to comment.