Skip to content

Commit

Permalink
Allow formerly broken mirror (#84)
Browse files Browse the repository at this point in the history
* Allow formerly broken mirror (#82)

* Update docs

* Style & build docs.

* Tweak tests and messaging.

---------

Co-authored-by: Jon Harmon <[email protected]>
  • Loading branch information
jrdnbradford and jonthegeek authored Oct 30, 2024
1 parent c946d59 commit 821ea47
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 40 deletions.
26 changes: 13 additions & 13 deletions R/gutenberg_download.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,19 @@
#' }
#'
#' @examplesIf interactive()
#' # download The Count of Monte Cristo
#' gutenberg_download(1184)
#'
#' # download two books: Wuthering Heights and Jane Eyre
#' books <- gutenberg_download(c(768, 1260), meta_fields = "title")
#' books
#' dplyr::count(books, title)
#'
#' # download all books from Jane Austen
#' austen <- gutenberg_works(author == "Austen, Jane") |>
#' gutenberg_download(meta_fields = "title")
#' austen
#' dplyr::count(austen, title)
#' # download The Count of Monte Cristo
#' gutenberg_download(1184)
#'
#' # download two books: Wuthering Heights and Jane Eyre
#' books <- gutenberg_download(c(768, 1260), meta_fields = "title")
#' books
#' dplyr::count(books, title)
#'
#' # download all books from Jane Austen
#' austen <- gutenberg_works(author == "Austen, Jane") |>
#' gutenberg_download(meta_fields = "title")
#' austen
#' dplyr::count(austen, title)
#'
#' @export
gutenberg_download <- function(gutenberg_id,
Expand Down
19 changes: 11 additions & 8 deletions R/gutenberg_mirrors.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ gutenberg_get_mirror <- function(verbose = TRUE) {
harvest_url <- "https://www.gutenberg.org/robot/harvest"
maybe_message(
verbose,
"Determining mirror for Project Gutenberg from {harvest_url}."
"Determining mirror for Project Gutenberg from {harvest_url}.",
class = "mirror-finding"
)
wget_url <- glue::glue("{harvest_url}?filetypes[]=txt")
lines <- read_url(wget_url)
Expand All @@ -32,13 +33,12 @@ gutenberg_get_mirror <- function(verbose = TRUE) {

# parse and leave out the path
parsed <- urltools::url_parse(mirror_full_url)
if (parsed$domain == "www.gutenberg.lib.md.us") {
# Broken mirror. PG has been contacted. For now, replace:
parsed$domain <- "aleph.gutenberg.org" # nocov
}

mirror <- unclass(glue::glue_data(parsed, "{scheme}://{domain}"))
maybe_message(verbose, "Using mirror {mirror}.")
maybe_message(
verbose,
"Using mirror {mirror}.",
class = "mirror-found"
)

# set option for next time
options(gutenberg_mirror = mirror)
Expand All @@ -48,7 +48,10 @@ gutenberg_get_mirror <- function(verbose = TRUE) {

#' Get all mirror data from Project Gutenberg
#'
#' Get all the mirror data from \url{https://www.gutenberg.org/MIRRORS.ALL}
#' Get all mirror data from \url{https://www.gutenberg.org/MIRRORS.ALL}. This
#' only includes mirrors reported to Project Gutenberg and verified to be
#' relatively stable. For more information on mirroring and getting your own
#' mirror listed, see \url{https://www.gutenberg.org/help/mirroring.html}.
#'
#' @return A tbl_df of Project Gutenberg mirrors and related data
#' \describe{
Expand Down
2 changes: 1 addition & 1 deletion R/gutenberg_strip.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ gutenberg_strip <- function(text) {
)
while (
length(text) > 0 &&
stringr::str_detect(stringr::str_to_lower(text[1]), start_paragraph_regex)
stringr::str_detect(stringr::str_to_lower(text[1]), start_paragraph_regex)
) {
# get rid of that paragraph, then the following whitespace
text <- discard_start_while(text, text != "")
Expand Down
13 changes: 11 additions & 2 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,17 @@ discard_end_while <- function(.x, .p) {
rev(discard_start_while(rev(.x), rev(.p)))
}

maybe_message <- function(verbose, message, ..., call = rlang::caller_env()) {
maybe_message <- function(verbose,
message,
class = NULL,
...,
call = rlang::caller_env()) {
if (verbose) {
cli::cli_inform(message, ..., .envir = call)
if (length(class)) {
class <- paste0("gutenbergr-msg-", class)
} else {
class <- "gutenbergr-msg"
}
cli::cli_inform(message, class = class, ..., .envir = call)
}
}
22 changes: 11 additions & 11 deletions man/gutenberg_download.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/gutenberg_get_all_mirrors.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 9 additions & 4 deletions tests/testthat/test-gutenberg_mirrors.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@ test_that("gutenberg_get_mirror works with no option set", {
local_dl_and_read()
withr::local_options(gutenberg_mirror = NULL)
expect_message(
expect_identical(
gutenberg_get_mirror(),
"http://aleph.gutenberg.org"
expect_message(
expect_identical(
gutenberg_get_mirror(),
"http://aleph.gutenberg.org"
),
"Determining mirror",
class = "gutenbergr-msg-mirror-finding"
),
"Determining mirror"
"Using mirror",
class = "gutenbergr-msg-mirror-found"
)
expect_no_message(
expect_identical(
Expand Down

0 comments on commit 821ea47

Please sign in to comment.