From c5522eba3e18343052666f2756a2a2451dd87f58 Mon Sep 17 00:00:00 2001 From: Jon Harmon Date: Thu, 5 Sep 2024 07:53:23 -0500 Subject: [PATCH 1/2] Use base pipe. Closes #75. I also updated the GHA to check all the way back to 4.1 (for now) to make sure we don't use it in a way that wasn't initially supported. --- .github/workflows/R-CMD-check.yaml | 2 ++ DESCRIPTION | 1 - NAMESPACE | 1 - NEWS.md | 5 +++-- R/data.R | 16 ++++++++-------- R/gutenberg_download.R | 2 +- R/gutenberg_works.R | 18 +++++++++--------- R/gutenbergr-package.R | 1 - README.Rmd | 6 +++--- README.md | 6 +++--- man/gutenberg_download.Rd | 2 +- man/gutenberg_metadata.Rd | 6 +++--- man/gutenberg_subjects.Rd | 10 +++++----- man/gutenberg_works.Rd | 8 ++++---- vignettes/intro.Rmd | 14 +++++++------- 15 files changed, 49 insertions(+), 49 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 21ccc7e..00ed8aa 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -25,6 +25,8 @@ jobs: - {os: windows-latest, r: 'release'} - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'oldrel-1'} + - {os: ubuntu-latest, r: 'oldrel-2'} + - {os: ubuntu-latest, r: 'oldrel-3'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} diff --git a/DESCRIPTION b/DESCRIPTION index 4a985a5..6fbe025 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,6 @@ Imports: cli, dplyr, glue, - magrittr, purrr, readr, rlang, diff --git a/NAMESPACE b/NAMESPACE index e33b5a7..75c8696 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,5 +8,4 @@ export(gutenberg_works) importFrom(dplyr,count) importFrom(dplyr,distinct) importFrom(dplyr,filter) -importFrom(magrittr,"%>%") importFrom(rlang,"%||%") diff --git a/NEWS.md b/NEWS.md index 5851200..4c6b264 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,9 @@ # gutenbergr (development version) -* `gutenberg_download()` tries the `.txt` version of files when the `.zip` is unavailable (@jrdnbradford, #55). +* `gutenberg_download()` tries the `.txt` version of files when the `.zip` is unavailable (@jrdnbradford, #55, #70). * New function `gutenberg_get_all_mirrors()` retrieves all mirror data (@jrdnbradford, #58). -* The package infrastructure has been updated to make the package more robust and maintainable. +* The package infrastructure has been updated to make the package more robust and maintainable (#60, #64, #69). +* We now use the base R pipe (`|>`) in code and examples, not the magrittr pipe (`%>%`) (@jonthegeek, #75). # gutenbergr 0.2.4 diff --git a/R/data.R b/R/data.R index de9e057..6343e00 100644 --- a/R/data.R +++ b/R/data.R @@ -37,19 +37,19 @@ #' #' gutenberg_metadata #' -#' gutenberg_metadata %>% +#' gutenberg_metadata |> #' count(author, sort = TRUE) #' #' # look for Shakespeare, excluding collections (containing "Works") and #' # translations -#' shakespeare_metadata <- gutenberg_metadata %>% +#' shakespeare_metadata <- gutenberg_metadata |> #' filter( #' author == "Shakespeare, William", #' language == "en", #' !str_detect(title, "Works"), #' has_text, #' !str_detect(rights, "Copyright") -#' ) %>% +#' ) |> #' distinct(title) #' #' \donttest{ @@ -101,17 +101,17 @@ #' library(dplyr) #' library(stringr) #' -#' gutenberg_subjects %>% -#' filter(subject_type == "lcsh") %>% +#' gutenberg_subjects |> +#' filter(subject_type == "lcsh") |> #' count(subject, sort = TRUE) #' -#' sherlock_holmes_subjects <- gutenberg_subjects %>% +#' sherlock_holmes_subjects <- gutenberg_subjects |> #' filter(str_detect(subject, "Holmes, Sherlock")) #' #' sherlock_holmes_subjects #' -#' sherlock_holmes_metadata <- gutenberg_works() %>% -#' filter(author == "Doyle, Arthur Conan") %>% +#' sherlock_holmes_metadata <- gutenberg_works() |> +#' filter(author == "Doyle, Arthur Conan") |> #' semi_join(sherlock_holmes_subjects, by = "gutenberg_id") #' #' sherlock_holmes_metadata diff --git a/R/gutenberg_download.R b/R/gutenberg_download.R index 9ef2699..57428b0 100644 --- a/R/gutenberg_download.R +++ b/R/gutenberg_download.R @@ -35,7 +35,7 @@ #' dplyr::count(books, title) #' #' # download all books from Jane Austen -#' austen <- gutenberg_works(author == "Austen, Jane") %>% +#' austen <- gutenberg_works(author == "Austen, Jane") |> #' gutenberg_download(meta_fields = "title") #' austen #' dplyr::count(austen, title) diff --git a/R/gutenberg_works.R b/R/gutenberg_works.R index a23acbb..563307e 100644 --- a/R/gutenberg_works.R +++ b/R/gutenberg_works.R @@ -50,16 +50,16 @@ #' #' # language specifications #' -#' gutenberg_works(languages = "es") %>% +#' gutenberg_works(languages = "es") |> #' count(language, sort = TRUE) #' -#' gutenberg_works(languages = c("en", "es")) %>% +#' gutenberg_works(languages = c("en", "es")) |> #' count(language, sort = TRUE) #' -#' gutenberg_works(languages = c("en", "es"), all_languages = TRUE) %>% +#' gutenberg_works(languages = c("en", "es"), all_languages = TRUE) |> #' count(language, sort = TRUE) #' -#' gutenberg_works(languages = c("en", "es"), only_languages = FALSE) %>% +#' gutenberg_works(languages = c("en", "es"), only_languages = FALSE) |> #' count(language, sort = TRUE) #' } #' @export @@ -85,20 +85,20 @@ gutenberg_works <- function(..., languages = "en", ret <- filter(gutenberg_metadata, ...) if (!is.null(languages)) { - lang_filt <- gutenberg_languages %>% - filter(language %in% languages) %>% + lang_filt <- gutenberg_languages |> + filter(language %in% languages) |> count(gutenberg_id, total_languages) if (all_languages) { - lang_filt <- lang_filt %>% + lang_filt <- lang_filt |> filter(n >= length(languages)) } if (only_languages) { - lang_filt <- lang_filt %>% + lang_filt <- lang_filt |> filter(total_languages <= n) } - ret <- ret %>% + ret <- ret |> filter(gutenberg_id %in% lang_filt$gutenberg_id) } diff --git a/R/gutenbergr-package.R b/R/gutenbergr-package.R index 46f156e..ebf88e3 100644 --- a/R/gutenbergr-package.R +++ b/R/gutenbergr-package.R @@ -5,7 +5,6 @@ #' @importFrom dplyr count #' @importFrom dplyr distinct #' @importFrom dplyr filter -#' @importFrom magrittr %>% #' @importFrom rlang %||% ## usethis namespace: end NULL diff --git a/README.Rmd b/README.Rmd index cd6c33e..f06e325 100644 --- a/README.Rmd +++ b/README.Rmd @@ -66,7 +66,7 @@ Suppose we wanted to download Emily Bronte's "Wuthering Heights." We could find library(dplyr) library(gutenbergr) -gutenberg_works() %>% +gutenberg_works() |> filter(title == "Wuthering Heights") # or just: @@ -87,14 +87,14 @@ wuthering_heights books <- gutenberg_download(c(768, 1260), meta_fields = "title") books -books %>% +books |> count(title) ``` It can also take the output of `gutenberg_works` directly. For example, we could get the text of all Aristotle's works, each annotated with both `gutenberg_id` and `title`, using: ```{r} -aristotle_books <- gutenberg_works(author == "Aristotle") %>% +aristotle_books <- gutenberg_works(author == "Aristotle") |> gutenberg_download(meta_fields = "title") aristotle_books diff --git a/README.md b/README.md index 33091ca..d2784af 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ could find the book’s ID by filtering: library(dplyr) library(gutenbergr) -gutenberg_works() %>% +gutenberg_works() |> filter(title == "Wuthering Heights") #> # A tibble: 1 × 8 #> gutenberg_id title author gutenberg_author_id language @@ -137,7 +137,7 @@ books #> 10 768 "" Wuthering Heights #> # ℹ 33,333 more rows -books %>% +books |> count(title) #> # A tibble: 2 × 2 #> title n @@ -151,7 +151,7 @@ we could get the text of all Aristotle’s works, each annotated with both `gutenberg_id` and `title`, using: ``` r -aristotle_books <- gutenberg_works(author == "Aristotle") %>% +aristotle_books <- gutenberg_works(author == "Aristotle") |> gutenberg_download(meta_fields = "title") aristotle_books diff --git a/man/gutenberg_download.Rd b/man/gutenberg_download.Rd index b4ecd09..b965b1e 100644 --- a/man/gutenberg_download.Rd +++ b/man/gutenberg_download.Rd @@ -55,7 +55,7 @@ using \code{\link[=gutenberg_works]{gutenberg_works()}} or the \link{gutenberg_m dplyr::count(books, title) # download all books from Jane Austen - austen <- gutenberg_works(author == "Austen, Jane") \%>\% + austen <- gutenberg_works(author == "Austen, Jane") |> gutenberg_download(meta_fields = "title") austen dplyr::count(austen, title) diff --git a/man/gutenberg_metadata.Rd b/man/gutenberg_metadata.Rd index 5973906..c39ae68 100644 --- a/man/gutenberg_metadata.Rd +++ b/man/gutenberg_metadata.Rd @@ -48,19 +48,19 @@ library(stringr) gutenberg_metadata -gutenberg_metadata \%>\% +gutenberg_metadata |> count(author, sort = TRUE) # look for Shakespeare, excluding collections (containing "Works") and # translations -shakespeare_metadata <- gutenberg_metadata \%>\% +shakespeare_metadata <- gutenberg_metadata |> filter( author == "Shakespeare, William", language == "en", !str_detect(title, "Works"), has_text, !str_detect(rights, "Copyright") - ) \%>\% + ) |> distinct(title) \donttest{ diff --git a/man/gutenberg_subjects.Rd b/man/gutenberg_subjects.Rd index 4834ad8..4381914 100644 --- a/man/gutenberg_subjects.Rd +++ b/man/gutenberg_subjects.Rd @@ -38,17 +38,17 @@ run \code{attr(gutenberg_subjects, "date_updated")}. library(dplyr) library(stringr) -gutenberg_subjects \%>\% - filter(subject_type == "lcsh") \%>\% +gutenberg_subjects |> + filter(subject_type == "lcsh") |> count(subject, sort = TRUE) -sherlock_holmes_subjects <- gutenberg_subjects \%>\% +sherlock_holmes_subjects <- gutenberg_subjects |> filter(str_detect(subject, "Holmes, Sherlock")) sherlock_holmes_subjects -sherlock_holmes_metadata <- gutenberg_works() \%>\% - filter(author == "Doyle, Arthur Conan") \%>\% +sherlock_holmes_metadata <- gutenberg_works() |> + filter(author == "Doyle, Arthur Conan") |> semi_join(sherlock_holmes_subjects, by = "gutenberg_id") sherlock_holmes_metadata diff --git a/man/gutenberg_works.Rd b/man/gutenberg_works.Rd index 00a55c3..873427f 100644 --- a/man/gutenberg_works.Rd +++ b/man/gutenberg_works.Rd @@ -75,16 +75,16 @@ gutenberg_works(author == "Shakespeare, William") # language specifications -gutenberg_works(languages = "es") \%>\% +gutenberg_works(languages = "es") |> count(language, sort = TRUE) -gutenberg_works(languages = c("en", "es")) \%>\% +gutenberg_works(languages = c("en", "es")) |> count(language, sort = TRUE) -gutenberg_works(languages = c("en", "es"), all_languages = TRUE) \%>\% +gutenberg_works(languages = c("en", "es"), all_languages = TRUE) |> count(language, sort = TRUE) -gutenberg_works(languages = c("en", "es"), only_languages = FALSE) \%>\% +gutenberg_works(languages = c("en", "es"), only_languages = FALSE) |> count(language, sort = TRUE) } \dontshow{\}) # examplesIf} diff --git a/vignettes/intro.Rmd b/vignettes/intro.Rmd index 7131a41..ba38b75 100644 --- a/vignettes/intro.Rmd +++ b/vignettes/intro.Rmd @@ -46,7 +46,7 @@ gutenberg_metadata For example, you could find the Gutenberg ID(s) of Jane Austen's _Persuasion_ by doing: ```{r filter} -gutenberg_metadata %>% +gutenberg_metadata |> filter(title == "Persuasion") ``` @@ -107,7 +107,7 @@ books Notice that the `meta_fields` argument allows us to add one or more additional fields from the `gutenberg_metadata` to the downloaded text, such as title or author. ```{r count books} -books %>% +books |> count(title) ``` @@ -122,10 +122,10 @@ gutenberg_subjects This is useful for extracting texts from a particular topic or genre, such as detective stories, or a particular character, such as Sherlock Holmes. The `gutenberg_id` column can then be used to download these texts or to link with other metadata. ```{r filter subjects} -gutenberg_subjects %>% +gutenberg_subjects |> filter(subject == "Detective and mystery stories") -gutenberg_subjects %>% +gutenberg_subjects |> filter(grepl("Holmes, Sherlock", subject)) ``` @@ -140,13 +140,13 @@ gutenberg_authors What's next after retrieving a book's text? Well, having the book as a data frame is especially useful for working with the [tidytext](https://github.com/juliasilge/tidytext) package for text analysis. ```{r tidytext} -words <- books %>% +words <- books |> unnest_tokens(word, text) words -word_counts <- words %>% - anti_join(stop_words, by = "word") %>% +word_counts <- words |> + anti_join(stop_words, by = "word") |> count(title, word, sort = TRUE) word_counts From 550fdfdc9ce88cddf0799bba408a00b4aa7ce120 Mon Sep 17 00:00:00 2001 From: Jon Harmon Date: Thu, 5 Sep 2024 07:58:11 -0500 Subject: [PATCH 2/2] Namespace all dplyr function calls. --- NAMESPACE | 3 --- R/gutenberg_works.R | 18 +++++++++--------- R/gutenbergr-package.R | 3 --- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 75c8696..d21b00b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,7 +5,4 @@ export(gutenberg_get_all_mirrors) export(gutenberg_get_mirror) export(gutenberg_strip) export(gutenberg_works) -importFrom(dplyr,count) -importFrom(dplyr,distinct) -importFrom(dplyr,filter) importFrom(rlang,"%||%") diff --git a/R/gutenberg_works.R b/R/gutenberg_works.R index 563307e..24b376e 100644 --- a/R/gutenberg_works.R +++ b/R/gutenberg_works.R @@ -82,37 +82,37 @@ gutenberg_works <- function(..., languages = "en", ) } ) - ret <- filter(gutenberg_metadata, ...) + ret <- dplyr::filter(gutenberg_metadata, ...) if (!is.null(languages)) { lang_filt <- gutenberg_languages |> - filter(language %in% languages) |> - count(gutenberg_id, total_languages) + dplyr::filter(language %in% languages) |> + dplyr::count(gutenberg_id, total_languages) if (all_languages) { lang_filt <- lang_filt |> - filter(n >= length(languages)) + dplyr::filter(n >= length(languages)) } if (only_languages) { lang_filt <- lang_filt |> - filter(total_languages <= n) + dplyr::filter(total_languages <= n) } ret <- ret |> - filter(gutenberg_id %in% lang_filt$gutenberg_id) + dplyr::filter(gutenberg_id %in% lang_filt$gutenberg_id) } if (!is.null(rights)) { .rights <- rights - ret <- filter(ret, rights %in% .rights) + ret <- dplyr::filter(ret, rights %in% .rights) } if (only_text) { - ret <- filter(ret, has_text) + ret <- dplyr::filter(ret, has_text) } if (distinct) { - ret <- distinct(ret, title, gutenberg_author_id, .keep_all = TRUE) + ret <- dplyr::distinct(ret, title, gutenberg_author_id, .keep_all = TRUE) # in older versions of dplyr, distinct_ didn't need .keep_all if (any(colnames(ret) == ".keep_all")) { ret$.keep_all <- NULL # nocov diff --git a/R/gutenbergr-package.R b/R/gutenbergr-package.R index ebf88e3..2584f02 100644 --- a/R/gutenbergr-package.R +++ b/R/gutenbergr-package.R @@ -2,9 +2,6 @@ "_PACKAGE" ## usethis namespace: start -#' @importFrom dplyr count -#' @importFrom dplyr distinct -#' @importFrom dplyr filter #' @importFrom rlang %||% ## usethis namespace: end NULL