From 74c641ca1dc56aa83655ac5305320390a2e021ad Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Wed, 15 Nov 2023 16:09:09 -0800 Subject: [PATCH 01/33] chore: rename `validate_key.R` to `utils_validate_key.R` --- R/{validate_key.R => utils_validate_key.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename R/{validate_key.R => utils_validate_key.R} (100%) diff --git a/R/validate_key.R b/R/utils_validate_key.R similarity index 100% rename from R/validate_key.R rename to R/utils_validate_key.R From 5eca735cef28ede11679864235ab1e33f870cfe4 Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Tue, 28 Nov 2023 12:53:31 -0800 Subject: [PATCH 02/33] feat: add `state_fips` dataset --- DESCRIPTION | 3 ++- R/utils_state_fips.R | 14 ++++++++++++++ data-raw/state_fips.R | 10 ++++++++++ data/state_fips.rda | Bin 0 -> 831 bytes man/state_fips.Rd | 25 +++++++++++++++++++++++++ 5 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 R/utils_state_fips.R create mode 100644 data-raw/state_fips.R create mode 100644 data/state_fips.rda create mode 100644 man/state_fips.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 1cf9074..82ef697 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -42,7 +42,8 @@ Imports: rlang Suggests: covr, - testthat (>= 3.0.0) + testthat (>= 3.0.0), + tidycensus LinkingTo: Rcpp, RcppArmadillo diff --git a/R/utils_state_fips.R b/R/utils_state_fips.R new file mode 100644 index 0000000..0b2446e --- /dev/null +++ b/R/utils_state_fips.R @@ -0,0 +1,14 @@ +#' Dataset with FIPS codes for US states +#' +#' Dataset including FIPS codes and postal abbreviations for each U.S. state, +#' district, and territory. +#' +#' @format +#' A tibble with 57 rows and 3 columns: +#' \describe{ +#' \item{`state`}{Two-letter postal abbreviation} +#' \item{`state_code`}{Two-digit FIPS code} +#' \item{`state_name`}{English name} +#' } +#' @source Derived from [tidycensus::fips_codes()] +"state_fips" diff --git a/data-raw/state_fips.R b/data-raw/state_fips.R new file mode 100644 index 0000000..ff2fb7c --- /dev/null +++ b/data-raw/state_fips.R @@ -0,0 +1,10 @@ +library(dplyr) +library(tibble) +library(tidycensus) + +state_fips <- tidycensus::fips_codes |> + tibble::as_tibble() |> + dplyr::distinct(state, state_code, state_name) |> + tibble::remove_rownames() + +usethis::use_data(state_fips, overwrite = TRUE) diff --git a/data/state_fips.rda b/data/state_fips.rda new file mode 100644 index 0000000000000000000000000000000000000000..5ee00700e775e87bca7140919c17c1b91cdbe5b8 GIT binary patch literal 831 zcmV-F1Hk-3T4*^jL0KkKS#y&TegFfS|HS|QOh`Zhf8a0w-|)Zh-|#>H0Du4izyjTl z-JHPahNhZmngOU7srsIVk~GokX!Sg!(rD2AQ$QY}r=$pprklz^s(K?p4H$sY>H`GG z0MGyiqLcj5R0AM127nm>gfeNS2+^`enHWf>sEm5OSdj3`q;nV5(YNjdETNk}`APTH!fxgBzI)}`G_S+EzcV43e$uKPm#fX4*gR{<|M23bIK z%SGL0NX}(!Mtbf<@`Ursi8}Dm5`iDBcKDsfDH3^3SfX@U?1SrMdLlLt(MD-``dN8Z zhdvEz2xnN<%GSi+gZ2i5rPMb)juqL<=5HM zfsx>gCaeasBI6{zJ*#)qSCRu=`os}?I0Lr<{ zah+`{!8_BY;S*4mD2Z^Im6Fw^lO}uo_Uo6U<-(>3MYB$!EvF2*YugxJKr@_tc^ct-;K5fwuQ1o*#SjLTx^auZ zzOsEy5q~0fL$dPR5{OXXbknB=cB;~vnUOTCjDiq@EQ53|iB`jG(P??5Pd5j|_Y*r=asH!NFvHRNN~s=^Bdid{<-T*Bi1?il~Y+>uTc JBpl?#pMV3_i&y{v literal 0 HcmV?d00001 diff --git a/man/state_fips.Rd b/man/state_fips.Rd new file mode 100644 index 0000000..23a6fc2 --- /dev/null +++ b/man/state_fips.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils_state_fips.R +\docType{data} +\name{state_fips} +\alias{state_fips} +\title{Dataset with FIPS codes for US states} +\format{ +A tibble with 57 rows and 3 columns: +\describe{ +\item{\code{state}}{Two-letter postal abbreviation} +\item{\code{state_code}}{Two-digit FIPS code} +\item{\code{state_name}}{English name} +} +} +\source{ +Derived from \code{\link[tidycensus:fips_codes]{tidycensus::fips_codes()}} +} +\usage{ +state_fips +} +\description{ +Dataset including FIPS codes and postal abbreviations for each U.S. state, +district, and territory. +} +\keyword{datasets} From 1e1b57d4bf19d46cff048d8c58544101db31792d Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Mon, 27 Nov 2023 12:31:28 -0800 Subject: [PATCH 03/33] feat: add `as_fips_code()` helper function --- R/utils_state_fips.R | 36 ++++++++++++++++++++++++++++++++++++ man/as_fips_code.Rd | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 man/as_fips_code.Rd diff --git a/R/utils_state_fips.R b/R/utils_state_fips.R index 0b2446e..072846f 100644 --- a/R/utils_state_fips.R +++ b/R/utils_state_fips.R @@ -12,3 +12,39 @@ #' } #' @source Derived from [tidycensus::fips_codes()] "state_fips" + +#' Get the standardized FIPS code associated with a state +#' +#' @param x A [numeric] or [character] vector of state names, +#' postal abbreviations, or FIPS codes. +#' Matches for state names and abbreviations are not case sensitive. +#' FIPS codes may be matched from numeric or character vectors, +#' with or without leading zeroes. +#' +#' @return A [character] vector of two-digit FIPS codes. +#' One-digit FIPS codes are prefixed with a leading zero, +#' e.g., `"06"` for California. +#' +#' @examples +#' as_fips_code("california") +#' +#' # Character vector matches ignore case +#' as_fips_code(c("DC", "Md", "va")) +#' +#' # Note that `3` and `7` are standardized to `NA`, +#' # because no state is assigned those FIPS codes +#' as_fips_code(1:10) +#' +#' # You can even mix methods in the same vector +#' as_fips_code(c("utah", "NM", 8, "04")) +#' +#' @keywords internal +as_fips_code <- function(x) { + state_fips$state_code[ + dplyr::coalesce( + match(toupper(x), state_fips$state), + match(tolower(x), tolower(state_fips$state_name)), + match(suppressWarnings(as.numeric(x)), as.numeric(state_fips$state_code)) + ) + ] +} diff --git a/man/as_fips_code.Rd b/man/as_fips_code.Rd new file mode 100644 index 0000000..747a01e --- /dev/null +++ b/man/as_fips_code.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils_state_fips.R +\name{as_fips_code} +\alias{as_fips_code} +\title{Get the standardized FIPS code associated with a state} +\usage{ +as_fips_code(x) +} +\arguments{ +\item{x}{A \link{numeric} or \link{character} vector of state names, +postal abbreviations, or FIPS codes. +Matches for state names and abbreviations are not case sensitive. +FIPS codes may be matched from numeric or character vectors, +with or without leading zeroes.} +} +\value{ +A \link{character} vector of two-digit FIPS codes. +One-digit FIPS codes are prefixed with a leading zero, +e.g., \code{"06"} for California. +} +\description{ +Get the standardized FIPS code associated with a state +} +\examples{ +as_fips_code("california") + +# Character vector matches ignore case +as_fips_code(c("DC", "Md", "va")) + +# Note that `3` and `7` are standardized to `NA`, +# because no state is assigned those FIPS codes +as_fips_code(1:10) + +# You can even mix methods in the same vector +as_fips_code(c("utah", "NM", 8, "04")) + +} +\keyword{internal} From b58298083175a74fb2e8f9fc6a23fbfb48194784 Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Fri, 17 Nov 2023 12:38:36 -0800 Subject: [PATCH 04/33] refactor(census_geo_api): use `as_fips_code()` --- R/census_geo_api.R | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/R/census_geo_api.R b/R/census_geo_api.R index 7e9c295..f2597b4 100644 --- a/R/census_geo_api.R +++ b/R/census_geo_api.R @@ -64,21 +64,7 @@ census_geo_api <- function( state <- toupper(state) df.out <- NULL - # Building fips table (previously loaded via .rda) - fips.codes <- structure(list(State = structure(1:55, levels = c("AK", "AL", - "AR", "AS", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", "GA", "GU", - "HI", "IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME", - "MI", "MN", "MO", "MS", "MT", "NC", "ND", "NE", "NH", "NJ", "NM", - "NV", "NY", "OH", "OK", "OR", "PA", "PR", "RI", "SC", "SD", "TN", - "TX", "UT", "VA", "VI", "VT", "WA", "WI", "WV", "WY"), class = "factor"), - FIPS = c(2L, 1L, 5L, 60L, 4L, 6L, 8L, 9L, 11L, 10L, 12L, - 13L, 66L, 15L, 19L, 16L, 17L, 18L, 20L, 21L, 22L, 25L, 24L, - 23L, 26L, 27L, 29L, 28L, 30L, 37L, 38L, 31L, 33L, 34L, 35L, - 32L, 36L, 39L, 40L, 41L, 42L, 72L, 44L, 45L, 46L, 47L, 48L, - 49L, 51L, 78L, 50L, 53L, 55L, 54L, 56L)), class = "data.frame", row.names = c(NA, - -55L)) - state.fips <- fips.codes[fips.codes$State == state, "FIPS"] - state.fips <- ifelse(nchar(state.fips) == 1, paste0("0", state.fips), state.fips) + state.fips <- as_fips_code(state) # if (age == F & sex == F) { # num <- ifelse(3:10 != 10, paste("0", 3:10, sep = ""), "10") From 975fa1c71b6d986d30aecf70d5c86a654285e47c Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Tue, 28 Nov 2023 12:55:01 -0800 Subject: [PATCH 05/33] refactor(census_geo_api): enumerate possible options for `geo` argument (see https://design.tidyverse.org/enumerate-options.html) --- R/census_geo_api.R | 4 +++- man/census_geo_api.Rd | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/census_geo_api.R b/R/census_geo_api.R index f2597b4..fa3ed9a 100644 --- a/R/census_geo_api.R +++ b/R/census_geo_api.R @@ -50,7 +50,7 @@ census_geo_api <- function( key = Sys.getenv("CENSUS_API_KEY"), state, - geo = "tract", + geo = c("tract", "block", "block_group", "county", "place"), age = FALSE, sex = FALSE, year = "2020", @@ -60,6 +60,8 @@ census_geo_api <- function( ) { validate_key(key) + geo <- rlang::arg_match(geo) + census <- NULL state <- toupper(state) diff --git a/man/census_geo_api.Rd b/man/census_geo_api.Rd index 548d57c..7f8b5a4 100644 --- a/man/census_geo_api.Rd +++ b/man/census_geo_api.Rd @@ -7,7 +7,7 @@ census_geo_api( key = Sys.getenv("CENSUS_API_KEY"), state, - geo = "tract", + geo = c("tract", "block", "block_group", "county", "place"), age = FALSE, sex = FALSE, year = "2020", From 4a99c87a2cc20a4e714f0f33a7ab9c539d5ebce8 Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Wed, 22 Nov 2023 12:18:04 -0800 Subject: [PATCH 06/33] chore: add `.DS_Store` to `.gitignore` --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 4e2c122..dc83fda 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +.DS_Store + # History files .Rhistory .Rapp.history @@ -21,4 +23,4 @@ vignettes/*.pdf src/RcppExports.o src/aux_funs.o src/sample_me.o -src/wru.so \ No newline at end of file +src/wru.so From 33e9b0041cc0dc864270a154b7f96ee300daba5e Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Wed, 22 Nov 2023 12:18:23 -0800 Subject: [PATCH 07/33] refactor(census_geo_api): enumerate possible values for `year` argument (see https://design.tidyverse.org/enumerate-options.html) --- R/census_geo_api.R | 16 +++++++++------- man/census_geo_api.Rd | 7 ++++--- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/R/census_geo_api.R b/R/census_geo_api.R index fa3ed9a..45fb20c 100644 --- a/R/census_geo_api.R +++ b/R/census_geo_api.R @@ -53,7 +53,7 @@ census_geo_api <- function( geo = c("tract", "block", "block_group", "county", "place"), age = FALSE, sex = FALSE, - year = "2020", + year = c("2020", "2010"), retry = 3, save_temp = NULL, counties = NULL @@ -62,6 +62,9 @@ census_geo_api <- function( geo <- rlang::arg_match(geo) + year <- as.character(year) + year <- rlang::arg_match(year) + census <- NULL state <- toupper(state) @@ -115,12 +118,11 @@ census_geo_api <- function( } # set the census data url links - if (as.character(year) != "2020") { - census_data_url = "https://api.census.gov/data/2010/dec/sf1?" - } - else { - census_data_url = "https://api.census.gov/data/2020/dec/pl?" - } + census_data_url <- switch( + as.character(year), + "2010" = "https://api.census.gov/data/2010/dec/sf1?", + "2020" = "https://api.census.gov/data/2020/dec/pl?" + ) if (geo == "place") { geo.merge <- c("state", "place") diff --git a/man/census_geo_api.Rd b/man/census_geo_api.Rd index 7f8b5a4..c16b748 100644 --- a/man/census_geo_api.Rd +++ b/man/census_geo_api.Rd @@ -7,13 +7,14 @@ census_geo_api( key = Sys.getenv("CENSUS_API_KEY"), state, - geo = c("tract", "block", "block_group", "county", "place"), + geo = c("tract", "block", "block_group", "county", "place", "zcta"), age = FALSE, sex = FALSE, - year = "2020", + year = c(2020, 2010), retry = 3, save_temp = NULL, - counties = NULL + counties = NULL, + zctas = NULL ) } \arguments{ From 06347cad11425a2c283fc1ad6d45ba6f6dc91a75 Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Wed, 22 Nov 2023 14:46:32 -0800 Subject: [PATCH 08/33] feat: add `assert_boolean()` helper function --- R/utils_assert.R | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 R/utils_assert.R diff --git a/R/utils_assert.R b/R/utils_assert.R new file mode 100644 index 0000000..8d9e157 --- /dev/null +++ b/R/utils_assert.R @@ -0,0 +1,37 @@ +assert_boolean <- function( + x, + argument_name = rlang::caller_arg(x), + call = rlang::caller_call() +) { + if (length(x) != 1) { + cli::cli_abort( + c( + "{.arg {argument_name}} must be a {.code TRUE} or {.code FALSE} value of length {.val {1}}.", + x = "{.arg {argument_name}} has a length of {.val {length(x)}}." + ), + call = call + ) + } + + if (!inherits(x, "logical")) { + cli::cli_abort( + c( + "{.arg {argument_name}} must be a {.class logical} {.code TRUE} or {.code FALSE} value.", + x = "{.arg {argument_name}} is an object of class {.cls {class(x)}}." + ), + call = call + ) + } + + if (!x %in% c(TRUE, FALSE)) { + cli::cli_abort( + c( + "{.arg {argument_name}} must be {.code TRUE} or {.code FALSE}.", + x = "{.arg {argument_name}} is {.val {x}}." + ), + call = call + ) + } + + x +} From 34a9faaf85a8953c1b20824f9e5ce0a8942d8379 Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Mon, 27 Nov 2023 13:02:38 -0800 Subject: [PATCH 09/33] feat: add `as_state_abbreviation()` helper function --- R/utils_state_fips.R | 32 ++++++++++++++++++++++++++++---- man/as_fips_code.Rd | 19 +++++++++++++++++-- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/R/utils_state_fips.R b/R/utils_state_fips.R index 072846f..7dc5c6f 100644 --- a/R/utils_state_fips.R +++ b/R/utils_state_fips.R @@ -13,7 +13,7 @@ #' @source Derived from [tidycensus::fips_codes()] "state_fips" -#' Get the standardized FIPS code associated with a state +#' Convert between state names, postal abbreviations, and FIPS codes #' #' @param x A [numeric] or [character] vector of state names, #' postal abbreviations, or FIPS codes. @@ -21,22 +21,35 @@ #' FIPS codes may be matched from numeric or character vectors, #' with or without leading zeroes. #' -#' @return A [character] vector of two-digit FIPS codes. -#' One-digit FIPS codes are prefixed with a leading zero, -#' e.g., `"06"` for California. +#' @return +#' \describe{ +#' \item{`as_state_fips_code()`}{ +#' A [character] vector of two-digit FIPS codes. +#' One-digit FIPS codes are prefixed with a leading zero, +#' e.g., `"06"` for California. +#' } +#' \item{`as_state_abbreviation()`}{ +#' A [character] vector of two-letter postal abbreviations, +#' e.g., `"CA"` for California. +#' } +#' } #' #' @examples #' as_fips_code("california") +#' as_state_abbreviation("california") #' #' # Character vector matches ignore case #' as_fips_code(c("DC", "Md", "va")) +#' as_state_abbreviation(c("district of columbia", "Maryland", "VIRGINIA")) #' #' # Note that `3` and `7` are standardized to `NA`, #' # because no state is assigned those FIPS codes #' as_fips_code(1:10) +#' as_state_abbreviation(1:10) #' #' # You can even mix methods in the same vector #' as_fips_code(c("utah", "NM", 8, "04")) +#' as_state_abbreviation(c("utah", "NM", 8, "04")) #' #' @keywords internal as_fips_code <- function(x) { @@ -48,3 +61,14 @@ as_fips_code <- function(x) { ) ] } + +#' @rdname as_fips_code +as_state_abbreviation <- function(x) { + state_fips$state[ + dplyr::coalesce( + match(toupper(x), state_fips$state), + match(tolower(x), tolower(state_fips$state_name)), + match(suppressWarnings(as.numeric(x)), as.numeric(state_fips$state_code)) + ) + ] +} \ No newline at end of file diff --git a/man/as_fips_code.Rd b/man/as_fips_code.Rd index 747a01e..1e2c60c 100644 --- a/man/as_fips_code.Rd +++ b/man/as_fips_code.Rd @@ -2,9 +2,12 @@ % Please edit documentation in R/utils_state_fips.R \name{as_fips_code} \alias{as_fips_code} -\title{Get the standardized FIPS code associated with a state} +\alias{as_state_abbreviation} +\title{Convert between state names, postal abbreviations, and FIPS codes} \usage{ as_fips_code(x) + +as_state_abbreviation(x) } \arguments{ \item{x}{A \link{numeric} or \link{character} vector of state names, @@ -14,25 +17,37 @@ FIPS codes may be matched from numeric or character vectors, with or without leading zeroes.} } \value{ +\describe{ +\item{\code{as_state_fips_code()}}{ A \link{character} vector of two-digit FIPS codes. One-digit FIPS codes are prefixed with a leading zero, e.g., \code{"06"} for California. } +\item{\code{as_state_abbreviation()}}{ +A \link{character} vector of two-letter postal abbreviations, +e.g., \code{"CA"} for California. +} +} +} \description{ -Get the standardized FIPS code associated with a state +Convert between state names, postal abbreviations, and FIPS codes } \examples{ as_fips_code("california") +as_state_abbreviation("california") # Character vector matches ignore case as_fips_code(c("DC", "Md", "va")) +as_state_abbreviation(c("district of columbia", "Maryland", "VIRGINIA")) # Note that `3` and `7` are standardized to `NA`, # because no state is assigned those FIPS codes as_fips_code(1:10) +as_state_abbreviation(1:10) # You can even mix methods in the same vector as_fips_code(c("utah", "NM", 8, "04")) +as_state_abbreviation(c("utah", "NM", 8, "04")) } \keyword{internal} From f47576e48bff8be5e5fb7317452ac6d4c37e8e29 Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Sat, 18 Nov 2023 20:34:32 -0800 Subject: [PATCH 10/33] feat: add `census_geo_api_zcta()` function --- R/census_geo_api_zcta.R | 186 +++++++++++++++++++++++++++++++++++++ R/staticimports.R | 57 ++++++++++++ man/census_geo_api_zcta.Rd | 57 ++++++++++++ 3 files changed, 300 insertions(+) create mode 100644 R/census_geo_api_zcta.R create mode 100644 R/staticimports.R create mode 100644 man/census_geo_api_zcta.Rd diff --git a/R/census_geo_api_zcta.R b/R/census_geo_api_zcta.R new file mode 100644 index 0000000..8ccdc5d --- /dev/null +++ b/R/census_geo_api_zcta.R @@ -0,0 +1,186 @@ +# @staticimports pkg:stringstatic +# str_pad + +#' Census download function for state-ZCTA-level data +#' +#' @inheritParams census_geo_api +#' @param ... These dots are for future extensions and must be empty. +#' +#' @return A [data.frame] with columns +#' `state`, `zcta`, `r_whi`, `r_bla`, `r_his`, `r_asi`, and `r_oth`. +#' +#' @examplesIf nzchar(Sys.getenv("CENSUS_API_KEY")) +#' \dontrun{census_geo_api_zcta(state = c("DE", "NJ")} +#' \dontrun{census_geo_api_zcta(state = "FL", age = TRUE, sex = TRUE)} +#' \dontrun{census_geo_api_zcta(state = "MA", age = FALSE, sex = FALSE, year = "2020")} +#' +#' @keywords internal +census_geo_api_zcta <- function( + state, + ..., + age = FALSE, + sex = FALSE, + year = c("2020", "2010", "2000"), + retry = 3, + key = Sys.getenv("CENSUS_API_KEY") +) { + # Validate arguments + rlang::check_dots_empty() + validate_key(key) + assert_boolean(age) + assert_boolean(sex) + year <- as.character(year) + year <- rlang::arg_match(year) + + if (year == "2020") { + census_data_url <- "https://api.census.gov/data/2020/dec/dhc?" + prefix <- "P12" + separator <- "_" + suffix <- "N" + } else if (year %in% c("2010", "2000")) { + census_data_url <- paste0("https://api.census.gov/data/", year, "/dec/sf1?") + prefix <- "P012" + separator <- "" + suffix <- "" + } + + race_codes <- list( + "whi" = "I", + "bla" = "B", + "his" = "H", + "asi" = c("D", "E"), + "oth" = c("C", "F", "G") + ) + + sex_codes <- c("mal" = 2, "fem" = 26) + + age_codes <- 1:23 + + numeric_codes <- if (age) { + unlist(purrr::map(sex_codes, function(x) x + age_codes)) + } else if (sex) { + sex_codes + } else { + 1 + } + numeric_codes <- str_pad(numeric_codes, width = 3, side = "left", pad = "0") + + vars <- expand.grid( + prefix, + unlist(race_codes), + separator, + numeric_codes, + suffix, + stringsAsFactors = FALSE + ) + vars <- apply(vars, 1, paste, collapse = "") + + region <- paste0( + "for=zip%20code%20tabulation%20area%20(or%20part):*&in=state:", + paste(as_fips_code(state), collapse = ",") + ) + + census <- get_census_api( + census_data_url, + key = key, + var.names = vars, + region = region, + retry + ) + + if (!age && !sex) { + ## Calculate Pr(Geolocation | Race) + + for (i in seq_along(race_codes)) { + var_name <- paste("r", names(race_codes)[[i]], sep = "_") + + code <- paste0(prefix, race_codes[[i]], separator, "001", suffix) + + census[var_name] <- rowSums(census[code]) + } + } else if (!age && sex) { + ## Calculate Pr(Geolocation, Sex | Race) + + for (race in seq_along(race_codes)) { + for (sex in seq_along(sex_codes)) { + var_name <- paste( + "r", + names(sex_codes)[[sex]], + names(race_codes)[[race]], + sep = "_" + ) + + code <- paste0( + prefix, + race_codes[[race]], + separator, + str_pad(sex_codes[[sex]], width = 3, pad = "0"), + suffix + ) + + census[var_name] <- rowSums(census[code]) + } + } + } else if (age && !sex) { + ## Calculate Pr(Geolocation, Age Category | Race) + + for (race in seq_along(race_codes)) { + for (age_category in age_codes) { + var_name <- paste( + "r", + age_category, + names(race_codes)[[race]], + sep = "_" + ) + + code <- paste0( + prefix, + race_codes[[race]], + separator, + str_pad(sex_codes + age_category, width = 3, pad = "0"), + suffix + ) + + census[var_name] <- rowSums(census[code]) + } + } + } else if (age && sex) { + ## Calculate Pr(Geolocation, Sex, Age Category | Race) + + for (race in seq_along(race_codes)) { + for (age_category in age_codes) { + for (sex in seq_along(sex_codes)) { + var_name <- paste( + "r", + names(sex_codes)[[sex]], + age_category, + names(race_codes)[[race]], + sep = "_" + ) + + code <- paste0( + prefix, + race_codes[[race]], + separator, + str_pad(sex_codes[[sex]] + age_category, width = 3, pad = "0"), + suffix + ) + + census[var_name] <- rowSums(census[code]) + } + } + } + } + + census <- dplyr::group_by(census, dplyr::across(dplyr::any_of("state"))) + census <- dplyr::mutate( + census, + state = as_state_abbreviation(state), + dplyr::across(dplyr::starts_with("r_"), function(x) x / sum(x)) + ) + census <- dplyr::ungroup(census) + + names(census)[[2]] <- "zcta" + + census +} diff --git a/R/staticimports.R b/R/staticimports.R new file mode 100644 index 0000000..dbb1efb --- /dev/null +++ b/R/staticimports.R @@ -0,0 +1,57 @@ +# Generated by staticimports; do not edit by hand. +# ====================================================================== +# Imported from pkg:stringstatic +# ====================================================================== + +#' Duplicate and concatenate strings within a character vector +#' +#' Dependency-free drop-in alternative for `stringr::str_pad()`. +#' +#' @author Eli Pousson \email{eli.pousson@gmail.com} +#' ([ORCID](https://orcid.org/0000-0001-8280-1706)) +#' +#' Alexander Rossell Hayes \email{alexander@rossellhayes.com} +#' ([ORCID](https://orcid.org/0000-0001-9412-0457)) +#' +#' @source Adapted from the [stringr](https://stringr.tidyverse.org/) package. +#' +#' @param string Input vector. +#' Either a character vector, or something coercible to one. +#' @param width Minimum width of padded strings. +#' @param side Side on which padding character is added (left, right or both). +#' @param pad Single padding character (default is a space). +#' @param use_width If `FALSE`, +#' use the length of the string instead of the width; +#' see [str_width()]/[str_length()] for the difference. +#' +#' @return A character vector. +#' @noRd +str_pad <- function( + string, width, side = c("left", "right", "both"), pad = " ", use_width = TRUE +) { + if (!is.numeric(width)) { + return(string[NA]) + } + + if (any(nchar(pad, type = "width") != 1)) { + stop("each string in `pad` should consist of code points of total width 1") + } + + side <- match.arg(side) + + nchar_type <- if (isTRUE(use_width)) "width" else "chars" + string_width <- nchar(string, nchar_type) + pad_width <- width - string_width + pad_width[pad_width < 0] <- 0 + + switch( + side, + "left" = paste0(strrep(pad, pad_width), string), + "right" = paste0(string, strrep(pad, pad_width)), + "both" = paste0( + strrep(pad, floor(pad_width / 2)), + string, + strrep(pad, ceiling(pad_width / 2)) + ) + ) +} diff --git a/man/census_geo_api_zcta.Rd b/man/census_geo_api_zcta.Rd new file mode 100644 index 0000000..76ea2ef --- /dev/null +++ b/man/census_geo_api_zcta.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/census_geo_api_zcta.R +\name{census_geo_api_zcta} +\alias{census_geo_api_zcta} +\title{Census download function for state-ZCTA-level data} +\usage{ +census_geo_api_zcta( + state, + ..., + age = FALSE, + sex = FALSE, + year = c("2020", "2010", "2000"), + retry = 3, + key = Sys.getenv("CENSUS_API_KEY") +) +} +\arguments{ +\item{state}{A required character object specifying which state to extract Census data for, +e.g., \code{"NJ"}.} + +\item{...}{These dots are for future extensions and must be empty.} + +\item{age}{A \code{TRUE}/\code{FALSE} object indicating whether to condition on +age or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). +If \code{TRUE}, function will return Pr(Geolocation, Age | Race). +If \code{\var{sex}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race).} + +\item{sex}{A \code{TRUE}/\code{FALSE} object indicating whether to condition on +sex or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). +If \code{TRUE}, function will return Pr(Geolocation, Sex | Race). +If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race).} + +\item{year}{A character object specifying the year of U.S. Census data to be downloaded. +Use \code{"2010"}, or \code{"2020"}. Default is \code{"2020"}. +Warning: 2020 U.S. Census data is downloaded only when \code{\var{age}} and +\code{\var{sex}} are both \code{FALSE}.} + +\item{retry}{The number of retries at the census website if network interruption occurs.} + +\item{key}{A required character object. Must contain user's Census API +key, which can be requested \href{https://api.census.gov/data/key_signup.html}{here}.} +} +\value{ +A \link{data.frame} with columns +\code{state}, \code{zcta}, \code{r_whi}, \code{r_bla}, \code{r_his}, \code{r_asi}, and \code{r_oth}. +} +\description{ +Census download function for state-ZCTA-level data +} +\examples{ +\dontshow{if (nzchar(Sys.getenv("CENSUS_API_KEY"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontrun{census_geo_api_zcta(state = c("DE", "NJ")} +\dontrun{census_geo_api_zcta(state = "FL", age = TRUE, sex = TRUE)} +\dontrun{census_geo_api_zcta(state = "MA", age = FALSE, sex = FALSE, year = "2020")} +\dontshow{\}) # examplesIf} +} +\keyword{internal} From 0a4ec05bffbdc4f2a59452b8cc1704bd1ac7cf91 Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Sat, 18 Nov 2023 20:34:32 -0800 Subject: [PATCH 11/33] feat(census_geo_api): support `geo = "zcta"` by passing to `census_geo_api_zcta()` --- R/census_geo_api.R | 18 ++++++++++++++++-- man/census_geo_api.Rd | 7 +++---- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/R/census_geo_api.R b/R/census_geo_api.R index 45fb20c..1c64d22 100644 --- a/R/census_geo_api.R +++ b/R/census_geo_api.R @@ -9,7 +9,7 @@ #' @param state A required character object specifying which state to extract Census data for, #' e.g., \code{"NJ"}. #' @param geo A character object specifying what aggregation level to use. -#' Use \code{"county"}, \code{"tract"},\code{"block_group"}, \code{"block"}, or \code{"place"}. +#' Use `"block"`, `"block_group"`, `"county"`, `"place"`, `"tract"`, or `"zcta"`. #' Default is \code{"tract"}. Warning: extracting block-level data takes very long. #' @param age A \code{TRUE}/\code{FALSE} object indicating whether to condition on #' age or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). @@ -50,7 +50,7 @@ census_geo_api <- function( key = Sys.getenv("CENSUS_API_KEY"), state, - geo = c("tract", "block", "block_group", "county", "place"), + geo = c("tract", "block", "block_group", "county", "place", "zcta"), age = FALSE, sex = FALSE, year = c("2020", "2010"), @@ -60,8 +60,22 @@ census_geo_api <- function( ) { validate_key(key) + geo <- tolower(geo) geo <- rlang::arg_match(geo) + if (geo == "zcta") { + return( + census_geo_api_zcta( + state = state, + age = age, + sex = sex, + year = year, + retry = retry, + key = key + ) + ) + } + year <- as.character(year) year <- rlang::arg_match(year) diff --git a/man/census_geo_api.Rd b/man/census_geo_api.Rd index c16b748..ceab108 100644 --- a/man/census_geo_api.Rd +++ b/man/census_geo_api.Rd @@ -10,11 +10,10 @@ census_geo_api( geo = c("tract", "block", "block_group", "county", "place", "zcta"), age = FALSE, sex = FALSE, - year = c(2020, 2010), + year = c("2020", "2010"), retry = 3, save_temp = NULL, - counties = NULL, - zctas = NULL + counties = NULL ) } \arguments{ @@ -29,7 +28,7 @@ By default, attempts to find a census key stored in an e.g., \code{"NJ"}.} \item{geo}{A character object specifying what aggregation level to use. -Use \code{"county"}, \code{"tract"},\code{"block_group"}, \code{"block"}, or \code{"place"}. +Use \code{"block"}, \code{"block_group"}, \code{"county"}, \code{"place"}, \code{"tract"}, or \code{"zcta"}. Default is \code{"tract"}. Warning: extracting block-level data takes very long.} \item{age}{A \code{TRUE}/\code{FALSE} object indicating whether to condition on From 45618545a234d00862bfb3999aab33ef0ff34dbf Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Tue, 28 Nov 2023 13:29:40 -0800 Subject: [PATCH 12/33] feat(get_census_data): add support for `census.geo = "zcta"` --- R/get_census_data.R | 33 ++++++++++++++++++++++++--------- man/get_census_data.Rd | 2 +- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/R/get_census_data.R b/R/get_census_data.R index a7f07ff..d4a4d09 100644 --- a/R/get_census_data.R +++ b/R/get_census_data.R @@ -36,21 +36,24 @@ #' #' @export #' -#' @examples +#' @examples #' \dontrun{get_census_data(states = c("NJ", "NY"), age = TRUE, sex = FALSE)} #' \dontrun{get_census_data(states = "MN", age = FALSE, sex = FALSE, year = "2020")} get_census_data <- function( - key = Sys.getenv("CENSUS_API_KEY"), - states, - age = FALSE, - sex = FALSE, - year = "2020", - census.geo = "block", - retry = 3, - county.list = NULL + key = Sys.getenv("CENSUS_API_KEY"), + states, + age = FALSE, + sex = FALSE, + year = "2020", + census.geo = c("tract", "block", "block_group", "county", "place", "zcta"), + retry = 3, + county.list = NULL ) { validate_key(key) + census.geo <- tolower(census.geo) + census.geo <- rlang::arg_match(census.geo) + states <- toupper(states) message("Collecting ", year, " Census data...") @@ -81,6 +84,18 @@ get_census_data <- function( county <- census_geo_api(key, s, geo = "county", age, sex, year, retry) CensusObj[[s]]$county <- county } + + if (census.geo == "zcta") { + if (!is.null(county.list)) { + cli::cli_abort(c( + "The {.arg county.list} argument must be set to {.code NULL} + when {.arg census_geo} is {.val zcta}, + because the Census Bureau does release data that divides ZCTAs by county." + )) + } + + CensusObj[[s]]$zcta <- census_geo_api(key, s, geo = "zcta", age, sex, year, retry) + } } return(CensusObj) } \ No newline at end of file diff --git a/man/get_census_data.Rd b/man/get_census_data.Rd index 4149f29..a3be2b7 100644 --- a/man/get_census_data.Rd +++ b/man/get_census_data.Rd @@ -10,7 +10,7 @@ get_census_data( age = FALSE, sex = FALSE, year = "2020", - census.geo = "block", + census.geo = c("tract", "block", "block_group", "county", "place", "zcta"), retry = 3, county.list = NULL ) From 817c7febb761b0ca67036b19a246f890babb5b45 Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Tue, 28 Nov 2023 13:30:39 -0800 Subject: [PATCH 13/33] docs: rebuild documentation --- man/census_data_preflight.Rd | 1 + man/census_geo_api.Rd | 10 +++++----- man/census_geo_api_zcta.Rd | 8 ++++++-- man/census_helper.Rd | 6 +++--- man/census_helper_new.Rd | 6 +++--- man/get_census_api.Rd | 6 +++--- man/get_census_api_2.Rd | 7 +++---- man/get_census_data.Rd | 6 +++--- man/modfuns.Rd | 14 +++++--------- 9 files changed, 32 insertions(+), 32 deletions(-) diff --git a/man/census_data_preflight.Rd b/man/census_data_preflight.Rd index 9c4dda9..5b656dd 100644 --- a/man/census_data_preflight.Rd +++ b/man/census_data_preflight.Rd @@ -24,6 +24,7 @@ And if \code{\var{census.geo} = "block"}, then \code{\var{voter.file}} must have columns named \code{county}, \code{tract}, and \code{block}. If \code{\var{census.geo} = "place"}, then \code{\var{voter.file}} must have column named \code{place}. +If \code{census.geo = "zcta"}, then \code{voter.file} must have column named \code{zcta}. Specifying \code{\var{census.geo}} will call \code{census_helper} function to merge Census geographic data at specified level of geography.} diff --git a/man/census_geo_api.Rd b/man/census_geo_api.Rd index ceab108..e679fa6 100644 --- a/man/census_geo_api.Rd +++ b/man/census_geo_api.Rd @@ -17,12 +17,12 @@ census_geo_api( ) } \arguments{ -\item{key}{A character string containing a valid Census API key, +\item{key}{A character string containing a valid Census API key, which can be requested from the -[U.S. Census API key signup page](https://api.census.gov/data/key_signup.html). +\href{https://api.census.gov/data/key_signup.html}{U.S. Census API key signup page}. By default, attempts to find a census key stored in an -[environment variable][Sys.getenv] named `CENSUS_API_KEY`.} +\link[=Sys.getenv]{environment variable} named \code{CENSUS_API_KEY}.} \item{state}{A required character object specifying which state to extract Census data for, e.g., \code{"NJ"}.} @@ -75,7 +75,7 @@ at either the county, tract, block, or place level, for a particular state. } \references{ -Relies on `get_census_api()`, `get_census_api_2()`, and `vec_to_chunk()` functions authored by Nicholas Nagle, -available [here](https://rstudio-pubs-static.s3.amazonaws.com/19337_2e7f827190514c569ea136db788ce850.html). +Relies on \code{get_census_api()}, \code{get_census_api_2()}, and \code{vec_to_chunk()} functions authored by Nicholas Nagle, +available \href{https://rstudio-pubs-static.s3.amazonaws.com/19337_2e7f827190514c569ea136db788ce850.html}{here}. } \keyword{internal} diff --git a/man/census_geo_api_zcta.Rd b/man/census_geo_api_zcta.Rd index 76ea2ef..ddcf6bd 100644 --- a/man/census_geo_api_zcta.Rd +++ b/man/census_geo_api_zcta.Rd @@ -37,8 +37,12 @@ Warning: 2020 U.S. Census data is downloaded only when \code{\var{age}} and \item{retry}{The number of retries at the census website if network interruption occurs.} -\item{key}{A required character object. Must contain user's Census API -key, which can be requested \href{https://api.census.gov/data/key_signup.html}{here}.} +\item{key}{A character string containing a valid Census API key, +which can be requested from the +\href{https://api.census.gov/data/key_signup.html}{U.S. Census API key signup page}. + +By default, attempts to find a census key stored in an +\link[=Sys.getenv]{environment variable} named \code{CENSUS_API_KEY}.} } \value{ A \link{data.frame} with columns diff --git a/man/census_helper.Rd b/man/census_helper.Rd index 0432d69..74dcb2a 100644 --- a/man/census_helper.Rd +++ b/man/census_helper.Rd @@ -18,12 +18,12 @@ census_helper( ) } \arguments{ -\item{key}{A character string containing a valid Census API key, +\item{key}{A character string containing a valid Census API key, which can be requested from the -[U.S. Census API key signup page](https://api.census.gov/data/key_signup.html). +\href{https://api.census.gov/data/key_signup.html}{U.S. Census API key signup page}. By default, attempts to find a census key stored in an -[environment variable][Sys.getenv] named `CENSUS_API_KEY`.} +\link[=Sys.getenv]{environment variable} named \code{CENSUS_API_KEY}.} \item{voter.file}{An object of class \code{data.frame}. Must contain field(s) named \code{\var{county}}, \code{\var{tract}}, \code{\var{block}}, and/or \code{\var{place}} diff --git a/man/census_helper_new.Rd b/man/census_helper_new.Rd index d5e88ac..7293d83 100644 --- a/man/census_helper_new.Rd +++ b/man/census_helper_new.Rd @@ -18,12 +18,12 @@ census_helper_new( ) } \arguments{ -\item{key}{A character string containing a valid Census API key, +\item{key}{A character string containing a valid Census API key, which can be requested from the -[U.S. Census API key signup page](https://api.census.gov/data/key_signup.html). +\href{https://api.census.gov/data/key_signup.html}{U.S. Census API key signup page}. By default, attempts to find a census key stored in an -[environment variable][Sys.getenv] named `CENSUS_API_KEY`.} +\link[=Sys.getenv]{environment variable} named \code{CENSUS_API_KEY}.} \item{voter.file}{An object of class \code{data.frame}. Must contain field(s) named \code{\var{county}}, \code{\var{tract}}, \code{\var{block}}, and/or \code{\var{place}} diff --git a/man/get_census_api.Rd b/man/get_census_api.Rd index 2751dc6..c3ebc13 100644 --- a/man/get_census_api.Rd +++ b/man/get_census_api.Rd @@ -16,12 +16,12 @@ get_census_api( \item{data_url}{URL root of the API, e.g., \code{"https://api.census.gov/data/2020/dec/pl"}.} -\item{key}{A character string containing a valid Census API key, +\item{key}{A character string containing a valid Census API key, which can be requested from the -[U.S. Census API key signup page](https://api.census.gov/data/key_signup.html). +\href{https://api.census.gov/data/key_signup.html}{U.S. Census API key signup page}. By default, attempts to find a census key stored in an -[environment variable][Sys.getenv] named `CENSUS_API_KEY`.} +\link[=Sys.getenv]{environment variable} named \code{CENSUS_API_KEY}.} \item{var.names}{A character vector of variables to get, e.g., \code{c("P2_005N", "P2_006N", "P2_007N", "P2_008N")}. diff --git a/man/get_census_api_2.Rd b/man/get_census_api_2.Rd index 937eada..a66146f 100644 --- a/man/get_census_api_2.Rd +++ b/man/get_census_api_2.Rd @@ -16,13 +16,12 @@ get_census_api_2( \item{data_url}{URL root of the API, e.g., \code{"https://api.census.gov/data/2020/dec/pl"}.} -\item{key}{A character string containing a valid Census API key, +\item{key}{A character string containing a valid Census API key, which can be requested from the -[U.S. Census API key signup page](https://api.census.gov/data/key_signup.html). +\href{https://api.census.gov/data/key_signup.html}{U.S. Census API key signup page}. By default, attempts to find a census key stored in an -[environment variable][Sys.getenv] named `CENSUS_API_KEY`.} - +\link[=Sys.getenv]{environment variable} named \code{CENSUS_API_KEY}.} \item{get}{A character vector of variables to get, e.g., \code{c("P2_005N", "P2_006N", "P2_007N", "P2_008N")}. diff --git a/man/get_census_data.Rd b/man/get_census_data.Rd index a3be2b7..e50d53e 100644 --- a/man/get_census_data.Rd +++ b/man/get_census_data.Rd @@ -16,12 +16,12 @@ get_census_data( ) } \arguments{ -\item{key}{A character string containing a valid Census API key, +\item{key}{A character string containing a valid Census API key, which can be requested from the -[U.S. Census API key signup page](https://api.census.gov/data/key_signup.html). +\href{https://api.census.gov/data/key_signup.html}{U.S. Census API key signup page}. By default, attempts to find a census key stored in an -[environment variable][Sys.getenv] named `CENSUS_API_KEY`.} +\link[=Sys.getenv]{environment variable} named \code{CENSUS_API_KEY}.} \item{states}{which states to extract Census data for, e.g., \code{c("NJ", "NY")}.} diff --git a/man/modfuns.Rd b/man/modfuns.Rd index 030f497..d7d682d 100644 --- a/man/modfuns.Rd +++ b/man/modfuns.Rd @@ -75,11 +75,11 @@ predict_race_me( \item{census.geo}{See documentation in \code{race_predict}.} \item{census.key}{A character object specifying user's Census API key. -Required if `census.geo` is specified, because a valid Census API key is +Required if \code{census.geo} is specified, because a valid Census API key is required to download Census geographic data. -If [`NULL`], the default, attempts to find a census key stored in an -[environment variable][Sys.getenv] named `CENSUS_API_KEY`.} +If \code{\link{NULL}}, the default, attempts to find a census key stored in an +\link[=Sys.getenv]{environment variable} named \code{CENSUS_API_KEY}.} \item{census.data}{See documentation in \code{race_predict}.} @@ -95,15 +95,11 @@ If [`NULL`], the default, attempts to find a census key stored in an \item{impute.missing}{See documentation in \code{race_predict}.} - \item{use.counties}{A logical, defaulting to FALSE. Should census data be filtered by counties available in \var{census.data}?} -\item{names.to.use}{One of 'surname', 'surname, first', or 'surname, first, -middle'. Defaults to 'surname'.} +\item{names.to.use}{See documentation in \code{race_predict}.} -\item{race.init}{Vector of initial race for each observation in voter.file. -Must be an integer vector, with 1=white, 2=black, 3=hispanic, 4=asian, and -5=other. Defaults to values obtained using \code{model="BISG_surname"}.} +\item{race.init}{See documentation in \code{race_predict}.} \item{ctrl}{See \code{control} in documentation for \code{\link[=predict_race]{predict_race()}}.} } From 62d22a72f4054ee3d3cb965599ec5fd081dd0417 Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Tue, 28 Nov 2023 13:30:39 -0800 Subject: [PATCH 14/33] feat(predict_race): add support for `census.geo = "zcta"` --- R/predict_race.R | 5 ++++- man/predict_race.Rd | 9 +++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/R/predict_race.R b/R/predict_race.R index 3356c3f..bd8df9a 100644 --- a/R/predict_race.R +++ b/R/predict_race.R @@ -40,6 +40,7 @@ #' must have columns named \code{county}, \code{tract}, and \code{block}. #' If \code{\var{census.geo} = "place"}, then \code{\var{voter.file}} #' must have column named \code{place}. +#' If `census.geo = "zcta"`, then `voter.file` must have column named `zcta`. #' Specifying \code{\var{census.geo}} will call \code{census_helper} function #' to merge Census geographic data at specified level of geography. #' @@ -140,7 +141,7 @@ predict_race <- function( voter.file, census.surname = TRUE, surname.only = FALSE, - census.geo, + census.geo = c("tract", "block", "block_group", "county", "place", "zcta"), census.key = Sys.getenv("CENSUS_API_KEY"), census.data = NULL, age = FALSE, @@ -178,6 +179,8 @@ predict_race <- function( ) } + census.geo <- tolower(census.geo) + census.geo <- rlang::arg_match(census.geo) # block_group is missing, pull from block if((surname.only == FALSE) && !(missing(census.geo)) && (census.geo == "block_group") && !("block_group" %in% names(voter.file))) { diff --git a/man/predict_race.Rd b/man/predict_race.Rd index 27371dc..2ee4ec5 100644 --- a/man/predict_race.Rd +++ b/man/predict_race.Rd @@ -8,7 +8,7 @@ predict_race( voter.file, census.surname = TRUE, surname.only = FALSE, - census.geo, + census.geo = c("tract", "block", "block_group", "county", "place", "zcta"), census.key = Sys.getenv("CENSUS_API_KEY"), census.data = NULL, age = FALSE, @@ -64,15 +64,16 @@ And if \code{\var{census.geo} = "block"}, then \code{\var{voter.file}} must have columns named \code{county}, \code{tract}, and \code{block}. If \code{\var{census.geo} = "place"}, then \code{\var{voter.file}} must have column named \code{place}. +If \code{census.geo = "zcta"}, then \code{voter.file} must have column named \code{zcta}. Specifying \code{\var{census.geo}} will call \code{census_helper} function to merge Census geographic data at specified level of geography.} \item{census.key}{A character object specifying user's Census API key. -Required if `census.geo` is specified, because a valid Census API key is +Required if \code{census.geo} is specified, because a valid Census API key is required to download Census geographic data. -If [`NULL`], the default, attempts to find a census key stored in an -[environment variable][Sys.getenv] named `CENSUS_API_KEY`.} +If \code{\link{NULL}}, the default, attempts to find a census key stored in an +\link[=Sys.getenv]{environment variable} named \code{CENSUS_API_KEY}.} \item{census.data}{A list indexed by two-letter state abbreviations, which contains pre-saved Census geographic data. From 8d5655e5f92736a552ccd941669eed2739946716 Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Tue, 28 Nov 2023 13:31:54 -0800 Subject: [PATCH 15/33] refactor(census_helper_new): use an `else` block to handle `geo == "place"`, `"county"`, and `"zcta"` --- R/census_helper_v2.R | 48 +++++++++++++++----------------------------- 1 file changed, 16 insertions(+), 32 deletions(-) diff --git a/R/census_helper_v2.R b/R/census_helper_v2.R index f246816..5a76fa1 100644 --- a/R/census_helper_v2.R +++ b/R/census_helper_v2.R @@ -100,30 +100,6 @@ census_helper_new <- function( message(paste("State ", s, " of ", length(states), ": ", states[s], sep = "")) state <- toupper(states[s]) - if (geo == "place") { - geo.merge <- c("place") - if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$year != year) || (census.data[[state]]$age != FALSE) || (census.data[[state]]$sex != FALSE)) { - #} || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { - if(use.counties) { - census <- census_geo_api(key, state, geo = "place", age, sex, retry) - } else { - census <- census_geo_api(key, state, geo = "place", age, sex, retry) - } - } else { - census <- census.data[[toupper(state)]]$place - } - } - - if (geo == "county") { - geo.merge <- c("county") - if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$year != year) || (census.data[[state]]$age != FALSE) || (census.data[[state]]$sex != FALSE)) { - #} || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { - census <- census_geo_api(key, state, geo = "county", age, sex, retry) - } else { - census <- census.data[[toupper(state)]]$county - } - } - if (geo == "tract") { geo.merge <- c("county", "tract") if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$year != year) || (census.data[[state]]$age != FALSE) || (census.data[[state]]$sex != FALSE)) {#} || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { @@ -137,9 +113,7 @@ census_helper_new <- function( } else { census <- census.data[[toupper(state)]]$tract } - } - - if (geo == "block_group") { + } else if (geo == "block_group") { geo.merge <- c("county", "tract", "block_group") if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$year != year) || (census.data[[state]]$age != FALSE) || (census.data[[state]]$sex != FALSE)) {#} || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { if(use.counties) { @@ -153,10 +127,7 @@ census_helper_new <- function( } else { census <- census.data[[toupper(state)]]$block_group } - } - - - if (geo == "block") { + } else if (geo == "block") { if(any(names(census.data) == "block_group")) { geo.merge <- c("county", "tract", "block_group", "block") } else { @@ -175,10 +146,23 @@ census_helper_new <- function( } else { census <- census.data[[toupper(state)]]$block } + } else { + geo.merge <- geo + + state_must_be_downloaded <- toDownload || + is.null(census.data[[state]]) || + census.data[[state]]$year != year || + census.data[[state]]$age != FALSE || + census.data[[state]]$sex != FALSE + + if (state_must_be_downloaded) { + census <- census_geo_api(key, state, geo = geo, age, sex, retry) + } else { + census <- census.data[[state]][[geo]] + } } census$state <- state - ## Calculate Pr(Geolocation | Race) if (year != "2020") { From cab4a3a96ea7a239933721490de5f47e6b1d1450 Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Wed, 29 Nov 2023 09:48:42 -0800 Subject: [PATCH 16/33] refactor(census_helper_new): improve efficiency of identification of zero-population locations --- R/census_helper_v2.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/census_helper_v2.R b/R/census_helper_v2.R index 5a76fa1..36dd25b 100644 --- a/R/census_helper_v2.R +++ b/R/census_helper_v2.R @@ -192,10 +192,10 @@ census_helper_new <- function( # check locations with zero people # get average without places with zero people, and assign that to zero locs. - if(any((geoPopulations - 0.0) < .Machine$double.eps)){ - zero_ind <- which((geoPopulations - 0.0) < .Machine$double.eps) - for(rcat in c("r_whi","r_bla","r_his","r_asi","r_oth") ){ - census[[rcat]][zero_ind] <- mean(census[[rcat]], na.rm=TRUE) + zero_ind <- which((geoPopulations - 0.0) < .Machine$double.eps) + if (length(zero_ind)) { + for (rcat in c("r_whi","r_bla","r_his","r_asi","r_oth")) { + census[[rcat]][zero_ind] <- mean(census[[rcat]], na.rm = TRUE) } } From 9576ca1bf1f5ddfb01d982c62849ee5468d4a8fd Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Wed, 29 Nov 2023 20:12:25 -0800 Subject: [PATCH 17/33] feat: add `census_geo_api_names()` and `census_geo_api_url()` helper functions --- R/census_geo_api_names.R | 117 ++++++ man/census_geo_api_names.Rd | 48 +++ tests/testthat/test-census_geo_api_names.R | 410 +++++++++++++++++++++ 3 files changed, 575 insertions(+) create mode 100644 R/census_geo_api_names.R create mode 100644 man/census_geo_api_names.Rd create mode 100644 tests/testthat/test-census_geo_api_names.R diff --git a/R/census_geo_api_names.R b/R/census_geo_api_names.R new file mode 100644 index 0000000..dc2aaa0 --- /dev/null +++ b/R/census_geo_api_names.R @@ -0,0 +1,117 @@ +# @staticimports pkg:stringstatic +# str_pad + +#' Census geo API helper functions +#' +#' @inheritParams census_geo_api +#' +#' @return +#' \describe{ +#' \item{`census_geo_api_names()`}{ +#' A named list of [character] vectors whose values correspond to columns +#' of a Census API table and whose names represent the new columns they are +#' used to calculate in [census_geo_api()]. +#' } +#' \item{`census_geo_api_url()`}{ +#' A [character] string containing the base of the URL to a +#' Census API table. +#' } +#' } +#' @keywords internal +census_geo_api_names <- function( + year = c("2020", "2010", "2000"), + age = FALSE, + sex = FALSE +) { + year <- as.character(year) + year <- rlang::arg_match(year) + + assert_boolean(age) + assert_boolean(sex) + + if (year == "2020") { + prefix <- "P12" + separator <- "_" + suffix <- "N" + } else if (year %in% c("2010", "2000")) { + prefix <- "P012" + separator <- "" + suffix <- "" + } + + race_codes <- list( + "_whi" = "I", + "_bla" = "B", + "_his" = "H", + "_asi" = c("D", "E"), + "_oth" = c("C", "F", "G") + ) + + sex_codes <- c("_mal" = 2, "_fem" = 26) + + age_codes <- 1:23 + names(age_codes) <- paste0("_", age_codes) + + numeric_codes <- if (age && sex) { + age_sex_codes <- unlist( + purrr::map(sex_codes, function(x) x + age_codes) + ) + names(age_sex_codes) <- sub(".", "", names(age_sex_codes), fixed = TRUE) + age_sex_codes[] <- str_pad(age_sex_codes, 3, "left", pad = "0") + as.list(age_sex_codes) + } else if (age) { + purrr::map( + age_codes, + function(x) str_pad(x + sex_codes, 3, "left", pad = "0") + ) + } else if (sex) { + sex_codes[] <- str_pad(sex_codes, 3, "left", pad = "0") + as.list(sex_codes) + } + + numeric_codes <- c("001", numeric_codes) + + combinations <- expand.grid( + prefix = prefix, + race_codes = race_codes, + separator = separator, + numeric_codes = numeric_codes, + suffix = suffix, + KEEP.OUT.ATTRS = FALSE, + stringsAsFactors = FALSE + ) + + vars <- purrr::pmap( + combinations, + function(prefix, race_codes, separator, numeric_codes, suffix) { + inner_combinations <- expand.grid( + prefix = prefix, + race_codes = race_codes, + separator = separator, + numeric_codes = numeric_codes, + suffix = suffix, + KEEP.OUT.ATTRS = FALSE, + stringsAsFactors = FALSE + ) + + apply(inner_combinations, 1, paste, collapse = "") + } + ) + + names(vars) <- paste0( + "r", + names(combinations$numeric_codes), + names(combinations$race_codes) + ) + + vars +} + +#' @rdname census_geo_api_names +census_geo_api_url <- function(year = c("2020", "2010", "2000")) { + year <- as.character(year) + year <- rlang::arg_match(year) + + if (year == "2020") return("https://api.census.gov/data/2020/dec/dhc?") + paste0("https://api.census.gov/data/", year, "/dec/sf1?") +} \ No newline at end of file diff --git a/man/census_geo_api_names.Rd b/man/census_geo_api_names.Rd new file mode 100644 index 0000000..a03488c --- /dev/null +++ b/man/census_geo_api_names.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/census_geo_api_names.R +\name{census_geo_api_names} +\alias{census_geo_api_names} +\alias{census_geo_api_url} +\title{Census geo API helper functions} +\usage{ +census_geo_api_names( + year = c("2020", "2010", "2000"), + age = FALSE, + sex = FALSE +) + +census_geo_api_url(year = c("2020", "2010", "2000")) +} +\arguments{ +\item{year}{A character object specifying the year of U.S. Census data to be downloaded. +Use \code{"2010"}, or \code{"2020"}. Default is \code{"2020"}. +Warning: 2020 U.S. Census data is downloaded only when \code{\var{age}} and +\code{\var{sex}} are both \code{FALSE}.} + +\item{age}{A \code{TRUE}/\code{FALSE} object indicating whether to condition on +age or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). +If \code{TRUE}, function will return Pr(Geolocation, Age | Race). +If \code{\var{sex}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race).} + +\item{sex}{A \code{TRUE}/\code{FALSE} object indicating whether to condition on +sex or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). +If \code{TRUE}, function will return Pr(Geolocation, Sex | Race). +If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race).} +} +\value{ +\describe{ +\item{\code{census_geo_api_names()}}{ +A named list of \link{character} vectors whose values correspond to columns +of a Census API table and whose names represent the new columns they are +used to calculate in \code{\link[=census_geo_api]{census_geo_api()}}. +} +\item{\code{census_geo_api_url()}}{ +A \link{character} string containing the base of the URL to a +Census API table. +} +} +} +\description{ +Census geo API helper functions +} +\keyword{internal} diff --git a/tests/testthat/test-census_geo_api_names.R b/tests/testthat/test-census_geo_api_names.R new file mode 100644 index 0000000..2ca9d84 --- /dev/null +++ b/tests/testthat/test-census_geo_api_names.R @@ -0,0 +1,410 @@ +test_that("census_geo_api_names() for 2020", { + expect_equal( + census_geo_api_names("2020"), + list( + r_whi = "P12I_001N", + r_bla = "P12B_001N", + r_his = "P12H_001N", + r_asi = c("P12D_001N", "P12E_001N"), + r_oth = c("P12C_001N", "P12F_001N", "P12G_001N") + ) + ) + + expect_equal( + census_geo_api_names("2020", age = FALSE, sex = TRUE), + c( + census_geo_api_names("2020", age = FALSE, sex = FALSE), + list( + r_mal_whi = "P12I_002N", + r_mal_bla = "P12B_002N", + r_mal_his = "P12H_002N", + r_mal_asi = c("P12D_002N", "P12E_002N"), + r_mal_oth = c("P12C_002N", "P12F_002N", "P12G_002N"), + r_fem_whi = "P12I_026N", + r_fem_bla = "P12B_026N", + r_fem_his = "P12H_026N", + r_fem_asi = c("P12D_026N", "P12E_026N"), + r_fem_oth = c("P12C_026N", "P12F_026N", "P12G_026N") + ) + ) + ) + + expect_equal( + census_geo_api_names("2020", age = TRUE, sex = FALSE), + c( + census_geo_api_names("2020", age = FALSE, sex = FALSE), + list( + r_1_whi = c("P12I_003N", "P12I_027N"), + r_1_bla = c("P12B_003N", "P12B_027N"), + r_1_his = c("P12H_003N", "P12H_027N"), + r_1_asi = c("P12D_003N", "P12E_003N", "P12D_027N", "P12E_027N"), + r_1_oth = c("P12C_003N", "P12F_003N", "P12G_003N", "P12C_027N", "P12F_027N", "P12G_027N"), + r_2_whi = c("P12I_004N", "P12I_028N"), + r_2_bla = c("P12B_004N", "P12B_028N"), + r_2_his = c("P12H_004N", "P12H_028N"), + r_2_asi = c("P12D_004N", "P12E_004N", "P12D_028N", "P12E_028N"), + r_2_oth = c("P12C_004N", "P12F_004N", "P12G_004N", "P12C_028N", "P12F_028N", "P12G_028N"), + r_3_whi = c("P12I_005N", "P12I_029N"), + r_3_bla = c("P12B_005N", "P12B_029N"), + r_3_his = c("P12H_005N", "P12H_029N"), + r_3_asi = c("P12D_005N", "P12E_005N", "P12D_029N", "P12E_029N"), + r_3_oth = c("P12C_005N", "P12F_005N", "P12G_005N", "P12C_029N", "P12F_029N", "P12G_029N"), + r_4_whi = c("P12I_006N", "P12I_030N"), + r_4_bla = c("P12B_006N", "P12B_030N"), + r_4_his = c("P12H_006N", "P12H_030N"), + r_4_asi = c("P12D_006N", "P12E_006N", "P12D_030N", "P12E_030N"), + r_4_oth = c("P12C_006N", "P12F_006N", "P12G_006N", "P12C_030N", "P12F_030N", "P12G_030N"), + r_5_whi = c("P12I_007N", "P12I_031N"), + r_5_bla = c("P12B_007N", "P12B_031N"), + r_5_his = c("P12H_007N", "P12H_031N"), + r_5_asi = c("P12D_007N", "P12E_007N", "P12D_031N", "P12E_031N"), + r_5_oth = c("P12C_007N", "P12F_007N", "P12G_007N", "P12C_031N", "P12F_031N", "P12G_031N"), + r_6_whi = c("P12I_008N", "P12I_032N"), + r_6_bla = c("P12B_008N", "P12B_032N"), + r_6_his = c("P12H_008N", "P12H_032N"), + r_6_asi = c("P12D_008N", "P12E_008N", "P12D_032N", "P12E_032N"), + r_6_oth = c("P12C_008N", "P12F_008N", "P12G_008N", "P12C_032N", "P12F_032N", "P12G_032N"), + r_7_whi = c("P12I_009N", "P12I_033N"), + r_7_bla = c("P12B_009N", "P12B_033N"), + r_7_his = c("P12H_009N", "P12H_033N"), + r_7_asi = c("P12D_009N", "P12E_009N", "P12D_033N", "P12E_033N"), + r_7_oth = c("P12C_009N", "P12F_009N", "P12G_009N", "P12C_033N", "P12F_033N", "P12G_033N"), + r_8_whi = c("P12I_010N", "P12I_034N"), + r_8_bla = c("P12B_010N", "P12B_034N"), + r_8_his = c("P12H_010N", "P12H_034N"), + r_8_asi = c("P12D_010N", "P12E_010N", "P12D_034N", "P12E_034N"), + r_8_oth = c("P12C_010N", "P12F_010N", "P12G_010N", "P12C_034N", "P12F_034N", "P12G_034N"), + r_9_whi = c("P12I_011N", "P12I_035N"), + r_9_bla = c("P12B_011N", "P12B_035N"), + r_9_his = c("P12H_011N", "P12H_035N"), + r_9_asi = c("P12D_011N", "P12E_011N", "P12D_035N", "P12E_035N"), + r_9_oth = c("P12C_011N", "P12F_011N", "P12G_011N", "P12C_035N", "P12F_035N", "P12G_035N"), + r_10_whi = c("P12I_012N", "P12I_036N"), + r_10_bla = c("P12B_012N", "P12B_036N"), + r_10_his = c("P12H_012N", "P12H_036N"), + r_10_asi = c("P12D_012N", "P12E_012N", "P12D_036N", "P12E_036N"), + r_10_oth = c("P12C_012N", "P12F_012N", "P12G_012N", "P12C_036N", "P12F_036N", "P12G_036N"), + r_11_whi = c("P12I_013N", "P12I_037N"), + r_11_bla = c("P12B_013N", "P12B_037N"), + r_11_his = c("P12H_013N", "P12H_037N"), + r_11_asi = c("P12D_013N", "P12E_013N", "P12D_037N", "P12E_037N"), + r_11_oth = c("P12C_013N", "P12F_013N", "P12G_013N", "P12C_037N", "P12F_037N", "P12G_037N"), + r_12_whi = c("P12I_014N", "P12I_038N"), + r_12_bla = c("P12B_014N", "P12B_038N"), + r_12_his = c("P12H_014N", "P12H_038N"), + r_12_asi = c("P12D_014N", "P12E_014N", "P12D_038N", "P12E_038N"), + r_12_oth = c("P12C_014N", "P12F_014N", "P12G_014N", "P12C_038N", "P12F_038N", "P12G_038N"), + r_13_whi = c("P12I_015N", "P12I_039N"), + r_13_bla = c("P12B_015N", "P12B_039N"), + r_13_his = c("P12H_015N", "P12H_039N"), + r_13_asi = c("P12D_015N", "P12E_015N", "P12D_039N", "P12E_039N"), + r_13_oth = c("P12C_015N", "P12F_015N", "P12G_015N", "P12C_039N", "P12F_039N", "P12G_039N"), + r_14_whi = c("P12I_016N", "P12I_040N"), + r_14_bla = c("P12B_016N", "P12B_040N"), + r_14_his = c("P12H_016N", "P12H_040N"), + r_14_asi = c("P12D_016N", "P12E_016N", "P12D_040N", "P12E_040N"), + r_14_oth = c("P12C_016N", "P12F_016N", "P12G_016N", "P12C_040N", "P12F_040N", "P12G_040N"), + r_15_whi = c("P12I_017N", "P12I_041N"), + r_15_bla = c("P12B_017N", "P12B_041N"), + r_15_his = c("P12H_017N", "P12H_041N"), + r_15_asi = c("P12D_017N", "P12E_017N", "P12D_041N", "P12E_041N"), + r_15_oth = c("P12C_017N", "P12F_017N", "P12G_017N", "P12C_041N", "P12F_041N", "P12G_041N"), + r_16_whi = c("P12I_018N", "P12I_042N"), + r_16_bla = c("P12B_018N", "P12B_042N"), + r_16_his = c("P12H_018N", "P12H_042N"), + r_16_asi = c("P12D_018N", "P12E_018N", "P12D_042N", "P12E_042N"), + r_16_oth = c("P12C_018N", "P12F_018N", "P12G_018N", "P12C_042N", "P12F_042N", "P12G_042N"), + r_17_whi = c("P12I_019N", "P12I_043N"), + r_17_bla = c("P12B_019N", "P12B_043N"), + r_17_his = c("P12H_019N", "P12H_043N"), + r_17_asi = c("P12D_019N", "P12E_019N", "P12D_043N", "P12E_043N"), + r_17_oth = c("P12C_019N", "P12F_019N", "P12G_019N", "P12C_043N", "P12F_043N", "P12G_043N"), + r_18_whi = c("P12I_020N", "P12I_044N"), + r_18_bla = c("P12B_020N", "P12B_044N"), + r_18_his = c("P12H_020N", "P12H_044N"), + r_18_asi = c("P12D_020N", "P12E_020N", "P12D_044N", "P12E_044N"), + r_18_oth = c("P12C_020N", "P12F_020N", "P12G_020N", "P12C_044N", "P12F_044N", "P12G_044N"), + r_19_whi = c("P12I_021N", "P12I_045N"), + r_19_bla = c("P12B_021N", "P12B_045N"), + r_19_his = c("P12H_021N", "P12H_045N"), + r_19_asi = c("P12D_021N", "P12E_021N", "P12D_045N", "P12E_045N"), + r_19_oth = c("P12C_021N", "P12F_021N", "P12G_021N", "P12C_045N", "P12F_045N", "P12G_045N"), + r_20_whi = c("P12I_022N", "P12I_046N"), + r_20_bla = c("P12B_022N", "P12B_046N"), + r_20_his = c("P12H_022N", "P12H_046N"), + r_20_asi = c("P12D_022N", "P12E_022N", "P12D_046N", "P12E_046N"), + r_20_oth = c("P12C_022N", "P12F_022N", "P12G_022N", "P12C_046N", "P12F_046N", "P12G_046N"), + r_21_whi = c("P12I_023N", "P12I_047N"), + r_21_bla = c("P12B_023N", "P12B_047N"), + r_21_his = c("P12H_023N", "P12H_047N"), + r_21_asi = c("P12D_023N", "P12E_023N", "P12D_047N", "P12E_047N"), + r_21_oth = c("P12C_023N", "P12F_023N", "P12G_023N", "P12C_047N", "P12F_047N", "P12G_047N"), + r_22_whi = c("P12I_024N", "P12I_048N"), + r_22_bla = c("P12B_024N", "P12B_048N"), + r_22_his = c("P12H_024N", "P12H_048N"), + r_22_asi = c("P12D_024N", "P12E_024N", "P12D_048N", "P12E_048N"), + r_22_oth = c("P12C_024N", "P12F_024N", "P12G_024N", "P12C_048N", "P12F_048N", "P12G_048N"), + r_23_whi = c("P12I_025N", "P12I_049N"), + r_23_bla = c("P12B_025N", "P12B_049N"), + r_23_his = c("P12H_025N", "P12H_049N"), + r_23_asi = c("P12D_025N", "P12E_025N", "P12D_049N", "P12E_049N"), + r_23_oth = c("P12C_025N", "P12F_025N", "P12G_025N", "P12C_049N", "P12F_049N", "P12G_049N") + ) + ) + ) + + expect_equal( + census_geo_api_names("2020", age = TRUE, sex = TRUE), + c( + census_geo_api_names("2020", age = FALSE, sex = FALSE), + list( + r_mal_1_whi = "P12I_003N", + r_mal_1_bla = "P12B_003N", + r_mal_1_his = "P12H_003N", + r_mal_1_asi = c("P12D_003N", "P12E_003N"), + r_mal_1_oth = c("P12C_003N", "P12F_003N", "P12G_003N"), + r_mal_2_whi = "P12I_004N", + r_mal_2_bla = "P12B_004N", + r_mal_2_his = "P12H_004N", + r_mal_2_asi = c("P12D_004N", "P12E_004N"), + r_mal_2_oth = c("P12C_004N", "P12F_004N", "P12G_004N"), + r_mal_3_whi = "P12I_005N", + r_mal_3_bla = "P12B_005N", + r_mal_3_his = "P12H_005N", + r_mal_3_asi = c("P12D_005N", "P12E_005N"), + r_mal_3_oth = c("P12C_005N", "P12F_005N", "P12G_005N"), + r_mal_4_whi = "P12I_006N", + r_mal_4_bla = "P12B_006N", + r_mal_4_his = "P12H_006N", + r_mal_4_asi = c("P12D_006N", "P12E_006N"), + r_mal_4_oth = c("P12C_006N", "P12F_006N", "P12G_006N"), + r_mal_5_whi = "P12I_007N", + r_mal_5_bla = "P12B_007N", + r_mal_5_his = "P12H_007N", + r_mal_5_asi = c("P12D_007N", "P12E_007N"), + r_mal_5_oth = c("P12C_007N", "P12F_007N", "P12G_007N"), + r_mal_6_whi = "P12I_008N", + r_mal_6_bla = "P12B_008N", + r_mal_6_his = "P12H_008N", + r_mal_6_asi = c("P12D_008N", "P12E_008N"), + r_mal_6_oth = c("P12C_008N", "P12F_008N", "P12G_008N"), + r_mal_7_whi = "P12I_009N", + r_mal_7_bla = "P12B_009N", + r_mal_7_his = "P12H_009N", + r_mal_7_asi = c("P12D_009N", "P12E_009N"), + r_mal_7_oth = c("P12C_009N", "P12F_009N", "P12G_009N"), + r_mal_8_whi = "P12I_010N", + r_mal_8_bla = "P12B_010N", + r_mal_8_his = "P12H_010N", + r_mal_8_asi = c("P12D_010N", "P12E_010N"), + r_mal_8_oth = c("P12C_010N", "P12F_010N", "P12G_010N"), + r_mal_9_whi = "P12I_011N", + r_mal_9_bla = "P12B_011N", + r_mal_9_his = "P12H_011N", + r_mal_9_asi = c("P12D_011N", "P12E_011N"), + r_mal_9_oth = c("P12C_011N", "P12F_011N", "P12G_011N"), + r_mal_10_whi = "P12I_012N", + r_mal_10_bla = "P12B_012N", + r_mal_10_his = "P12H_012N", + r_mal_10_asi = c("P12D_012N", "P12E_012N"), + r_mal_10_oth = c("P12C_012N", "P12F_012N", "P12G_012N"), + r_mal_11_whi = "P12I_013N", + r_mal_11_bla = "P12B_013N", + r_mal_11_his = "P12H_013N", + r_mal_11_asi = c("P12D_013N", "P12E_013N"), + r_mal_11_oth = c("P12C_013N", "P12F_013N", "P12G_013N"), + r_mal_12_whi = "P12I_014N", + r_mal_12_bla = "P12B_014N", + r_mal_12_his = "P12H_014N", + r_mal_12_asi = c("P12D_014N", "P12E_014N"), + r_mal_12_oth = c("P12C_014N", "P12F_014N", "P12G_014N"), + r_mal_13_whi = "P12I_015N", + r_mal_13_bla = "P12B_015N", + r_mal_13_his = "P12H_015N", + r_mal_13_asi = c("P12D_015N", "P12E_015N"), + r_mal_13_oth = c("P12C_015N", "P12F_015N", "P12G_015N"), + r_mal_14_whi = "P12I_016N", + r_mal_14_bla = "P12B_016N", + r_mal_14_his = "P12H_016N", + r_mal_14_asi = c("P12D_016N", "P12E_016N"), + r_mal_14_oth = c("P12C_016N", "P12F_016N", "P12G_016N"), + r_mal_15_whi = "P12I_017N", + r_mal_15_bla = "P12B_017N", + r_mal_15_his = "P12H_017N", + r_mal_15_asi = c("P12D_017N", "P12E_017N"), + r_mal_15_oth = c("P12C_017N", "P12F_017N", "P12G_017N"), + r_mal_16_whi = "P12I_018N", + r_mal_16_bla = "P12B_018N", + r_mal_16_his = "P12H_018N", + r_mal_16_asi = c("P12D_018N", "P12E_018N"), + r_mal_16_oth = c("P12C_018N", "P12F_018N", "P12G_018N"), + r_mal_17_whi = "P12I_019N", + r_mal_17_bla = "P12B_019N", + r_mal_17_his = "P12H_019N", + r_mal_17_asi = c("P12D_019N", "P12E_019N"), + r_mal_17_oth = c("P12C_019N", "P12F_019N", "P12G_019N"), + r_mal_18_whi = "P12I_020N", + r_mal_18_bla = "P12B_020N", + r_mal_18_his = "P12H_020N", + r_mal_18_asi = c("P12D_020N", "P12E_020N"), + r_mal_18_oth = c("P12C_020N", "P12F_020N", "P12G_020N"), + r_mal_19_whi = "P12I_021N", + r_mal_19_bla = "P12B_021N", + r_mal_19_his = "P12H_021N", + r_mal_19_asi = c("P12D_021N", "P12E_021N"), + r_mal_19_oth = c("P12C_021N", "P12F_021N", "P12G_021N"), + r_mal_20_whi = "P12I_022N", + r_mal_20_bla = "P12B_022N", + r_mal_20_his = "P12H_022N", + r_mal_20_asi = c("P12D_022N", "P12E_022N"), + r_mal_20_oth = c("P12C_022N", "P12F_022N", "P12G_022N"), + r_mal_21_whi = "P12I_023N", + r_mal_21_bla = "P12B_023N", + r_mal_21_his = "P12H_023N", + r_mal_21_asi = c("P12D_023N", "P12E_023N"), + r_mal_21_oth = c("P12C_023N", "P12F_023N", "P12G_023N"), + r_mal_22_whi = "P12I_024N", + r_mal_22_bla = "P12B_024N", + r_mal_22_his = "P12H_024N", + r_mal_22_asi = c("P12D_024N", "P12E_024N"), + r_mal_22_oth = c("P12C_024N", "P12F_024N", "P12G_024N"), + r_mal_23_whi = "P12I_025N", + r_mal_23_bla = "P12B_025N", + r_mal_23_his = "P12H_025N", + r_mal_23_asi = c("P12D_025N", "P12E_025N"), + r_mal_23_oth = c("P12C_025N", "P12F_025N", "P12G_025N"), + r_fem_1_whi = "P12I_027N", + r_fem_1_bla = "P12B_027N", + r_fem_1_his = "P12H_027N", + r_fem_1_asi = c("P12D_027N", "P12E_027N"), + r_fem_1_oth = c("P12C_027N", "P12F_027N", "P12G_027N"), + r_fem_2_whi = "P12I_028N", + r_fem_2_bla = "P12B_028N", + r_fem_2_his = "P12H_028N", + r_fem_2_asi = c("P12D_028N", "P12E_028N"), + r_fem_2_oth = c("P12C_028N", "P12F_028N", "P12G_028N"), + r_fem_3_whi = "P12I_029N", + r_fem_3_bla = "P12B_029N", + r_fem_3_his = "P12H_029N", + r_fem_3_asi = c("P12D_029N", "P12E_029N"), + r_fem_3_oth = c("P12C_029N", "P12F_029N", "P12G_029N"), + r_fem_4_whi = "P12I_030N", + r_fem_4_bla = "P12B_030N", + r_fem_4_his = "P12H_030N", + r_fem_4_asi = c("P12D_030N", "P12E_030N"), + r_fem_4_oth = c("P12C_030N", "P12F_030N", "P12G_030N"), + r_fem_5_whi = "P12I_031N", + r_fem_5_bla = "P12B_031N", + r_fem_5_his = "P12H_031N", + r_fem_5_asi = c("P12D_031N", "P12E_031N"), + r_fem_5_oth = c("P12C_031N", "P12F_031N", "P12G_031N"), + r_fem_6_whi = "P12I_032N", + r_fem_6_bla = "P12B_032N", + r_fem_6_his = "P12H_032N", + r_fem_6_asi = c("P12D_032N", "P12E_032N"), + r_fem_6_oth = c("P12C_032N", "P12F_032N", "P12G_032N"), + r_fem_7_whi = "P12I_033N", + r_fem_7_bla = "P12B_033N", + r_fem_7_his = "P12H_033N", + r_fem_7_asi = c("P12D_033N", "P12E_033N"), + r_fem_7_oth = c("P12C_033N", "P12F_033N", "P12G_033N"), + r_fem_8_whi = "P12I_034N", + r_fem_8_bla = "P12B_034N", + r_fem_8_his = "P12H_034N", + r_fem_8_asi = c("P12D_034N", "P12E_034N"), + r_fem_8_oth = c("P12C_034N", "P12F_034N", "P12G_034N"), + r_fem_9_whi = "P12I_035N", + r_fem_9_bla = "P12B_035N", + r_fem_9_his = "P12H_035N", + r_fem_9_asi = c("P12D_035N", "P12E_035N"), + r_fem_9_oth = c("P12C_035N", "P12F_035N", "P12G_035N"), + r_fem_10_whi = "P12I_036N", + r_fem_10_bla = "P12B_036N", + r_fem_10_his = "P12H_036N", + r_fem_10_asi = c("P12D_036N", "P12E_036N"), + r_fem_10_oth = c("P12C_036N", "P12F_036N", "P12G_036N"), + r_fem_11_whi = "P12I_037N", + r_fem_11_bla = "P12B_037N", + r_fem_11_his = "P12H_037N", + r_fem_11_asi = c("P12D_037N", "P12E_037N"), + r_fem_11_oth = c("P12C_037N", "P12F_037N", "P12G_037N"), + r_fem_12_whi = "P12I_038N", + r_fem_12_bla = "P12B_038N", + r_fem_12_his = "P12H_038N", + r_fem_12_asi = c("P12D_038N", "P12E_038N"), + r_fem_12_oth = c("P12C_038N", "P12F_038N", "P12G_038N"), + r_fem_13_whi = "P12I_039N", + r_fem_13_bla = "P12B_039N", + r_fem_13_his = "P12H_039N", + r_fem_13_asi = c("P12D_039N", "P12E_039N"), + r_fem_13_oth = c("P12C_039N", "P12F_039N", "P12G_039N"), + r_fem_14_whi = "P12I_040N", + r_fem_14_bla = "P12B_040N", + r_fem_14_his = "P12H_040N", + r_fem_14_asi = c("P12D_040N", "P12E_040N"), + r_fem_14_oth = c("P12C_040N", "P12F_040N", "P12G_040N"), + r_fem_15_whi = "P12I_041N", + r_fem_15_bla = "P12B_041N", + r_fem_15_his = "P12H_041N", + r_fem_15_asi = c("P12D_041N", "P12E_041N"), + r_fem_15_oth = c("P12C_041N", "P12F_041N", "P12G_041N"), + r_fem_16_whi = "P12I_042N", + r_fem_16_bla = "P12B_042N", + r_fem_16_his = "P12H_042N", + r_fem_16_asi = c("P12D_042N", "P12E_042N"), + r_fem_16_oth = c("P12C_042N", "P12F_042N", "P12G_042N"), + r_fem_17_whi = "P12I_043N", + r_fem_17_bla = "P12B_043N", + r_fem_17_his = "P12H_043N", + r_fem_17_asi = c("P12D_043N", "P12E_043N"), + r_fem_17_oth = c("P12C_043N", "P12F_043N", "P12G_043N"), + r_fem_18_whi = "P12I_044N", + r_fem_18_bla = "P12B_044N", + r_fem_18_his = "P12H_044N", + r_fem_18_asi = c("P12D_044N", "P12E_044N"), + r_fem_18_oth = c("P12C_044N", "P12F_044N", "P12G_044N"), + r_fem_19_whi = "P12I_045N", + r_fem_19_bla = "P12B_045N", + r_fem_19_his = "P12H_045N", + r_fem_19_asi = c("P12D_045N", "P12E_045N"), + r_fem_19_oth = c("P12C_045N", "P12F_045N", "P12G_045N"), + r_fem_20_whi = "P12I_046N", + r_fem_20_bla = "P12B_046N", + r_fem_20_his = "P12H_046N", + r_fem_20_asi = c("P12D_046N", "P12E_046N"), + r_fem_20_oth = c("P12C_046N", "P12F_046N", "P12G_046N"), + r_fem_21_whi = "P12I_047N", + r_fem_21_bla = "P12B_047N", + r_fem_21_his = "P12H_047N", + r_fem_21_asi = c("P12D_047N", "P12E_047N"), + r_fem_21_oth = c("P12C_047N", "P12F_047N", "P12G_047N"), + r_fem_22_whi = "P12I_048N", + r_fem_22_bla = "P12B_048N", + r_fem_22_his = "P12H_048N", + r_fem_22_asi = c("P12D_048N", "P12E_048N"), + r_fem_22_oth = c("P12C_048N", "P12F_048N", "P12G_048N"), + r_fem_23_whi = "P12I_049N", + r_fem_23_bla = "P12B_049N", + r_fem_23_his = "P12H_049N", + r_fem_23_asi = c("P12D_049N", "P12E_049N"), + r_fem_23_oth = c("P12C_049N", "P12F_049N", "P12G_049N") + ) + ) + ) +}) + +test_that("census_geo_api_url()", { + expect_equal( + census_geo_api_url("2020"), + "https://api.census.gov/data/2020/dec/dhc?" + ) + expect_equal( + census_geo_api_url("2010"), + "https://api.census.gov/data/2010/dec/sf1?" + ) + expect_equal( + census_geo_api_url("2000"), + "https://api.census.gov/data/2000/dec/sf1?" + ) + expect_error(census_geo_api_url("2023")) +}) From 984b6ce9c5a3fe353a2d6e0b3df4ceda350506f7 Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Wed, 29 Nov 2023 20:12:25 -0800 Subject: [PATCH 18/33] refactor(census_geo_api_zcta): use `census_geo_api_names()` --- R/census_geo_api_zcta.R | 152 +++++----------------------------------- 1 file changed, 19 insertions(+), 133 deletions(-) diff --git a/R/census_geo_api_zcta.R b/R/census_geo_api_zcta.R index 8ccdc5d..93fb0b0 100644 --- a/R/census_geo_api_zcta.R +++ b/R/census_geo_api_zcta.R @@ -1,6 +1,3 @@ -# @staticimports pkg:stringstatic -# str_pad - #' Census download function for state-ZCTA-level data #' #' @inheritParams census_geo_api @@ -32,48 +29,8 @@ census_geo_api_zcta <- function( year <- as.character(year) year <- rlang::arg_match(year) - if (year == "2020") { - census_data_url <- "https://api.census.gov/data/2020/dec/dhc?" - prefix <- "P12" - separator <- "_" - suffix <- "N" - } else if (year %in% c("2010", "2000")) { - census_data_url <- paste0("https://api.census.gov/data/", year, "/dec/sf1?") - prefix <- "P012" - separator <- "" - suffix <- "" - } - - race_codes <- list( - "whi" = "I", - "bla" = "B", - "his" = "H", - "asi" = c("D", "E"), - "oth" = c("C", "F", "G") - ) - - sex_codes <- c("mal" = 2, "fem" = 26) - - age_codes <- 1:23 - - numeric_codes <- if (age) { - unlist(purrr::map(sex_codes, function(x) x + age_codes)) - } else if (sex) { - sex_codes - } else { - 1 - } - numeric_codes <- str_pad(numeric_codes, width = 3, side = "left", pad = "0") - - vars <- expand.grid( - prefix, - unlist(race_codes), - separator, - numeric_codes, - suffix, - stringsAsFactors = FALSE - ) - vars <- apply(vars, 1, paste, collapse = "") + census_data_url <- census_geo_api_url(year = year) + vars <- census_geo_api_names(year = year, age = age, sex = sex) region <- paste0( "for=zip%20code%20tabulation%20area%20(or%20part):*&in=state:", @@ -83,104 +40,33 @@ census_geo_api_zcta <- function( census <- get_census_api( census_data_url, key = key, - var.names = vars, + var.names = unlist(vars), region = region, retry ) - if (!age && !sex) { - ## Calculate Pr(Geolocation | Race) - - for (i in seq_along(race_codes)) { - var_name <- paste("r", names(race_codes)[[i]], sep = "_") - - code <- paste0(prefix, race_codes[[i]], separator, "001", suffix) - - census[var_name] <- rowSums(census[code]) - } - } else if (!age && sex) { - ## Calculate Pr(Geolocation, Sex | Race) - - for (race in seq_along(race_codes)) { - for (sex in seq_along(sex_codes)) { - var_name <- paste( - "r", - names(sex_codes)[[sex]], - names(race_codes)[[race]], - sep = "_" - ) - - code <- paste0( - prefix, - race_codes[[race]], - separator, - str_pad(sex_codes[[sex]], width = 3, pad = "0"), - suffix - ) - - census[var_name] <- rowSums(census[code]) - } - } - } else if (age && !sex) { - ## Calculate Pr(Geolocation, Age Category | Race) - - for (race in seq_along(race_codes)) { - for (age_category in age_codes) { - var_name <- paste( - "r", - age_category, - names(race_codes)[[race]], - sep = "_" - ) - - code <- paste0( - prefix, - race_codes[[race]], - separator, - str_pad(sex_codes + age_category, width = 3, pad = "0"), - suffix - ) - - census[var_name] <- rowSums(census[code]) - } - } - } else if (age && sex) { - ## Calculate Pr(Geolocation, Sex, Age Category | Race) - - for (race in seq_along(race_codes)) { - for (age_category in age_codes) { - for (sex in seq_along(sex_codes)) { - var_name <- paste( - "r", - names(sex_codes)[[sex]], - age_category, - names(race_codes)[[race]], - sep = "_" - ) - - code <- paste0( - prefix, - race_codes[[race]], - separator, - str_pad(sex_codes[[sex]] + age_category, width = 3, pad = "0"), - suffix - ) - - census[var_name] <- rowSums(census[code]) - } - } - } - } + census <- dplyr::mutate(census, state = as_state_abbreviation(state)) + names(census)[[2]] <- "zcta" + + r_columns <- purrr::map(vars, function(vars) rowSums(census[vars])) + census <- dplyr::bind_cols(census, r_columns) census <- dplyr::group_by(census, dplyr::across(dplyr::any_of("state"))) census <- dplyr::mutate( census, - state = as_state_abbreviation(state), - dplyr::across(dplyr::starts_with("r_"), function(x) x / sum(x)) + dplyr::across( + # Divide all r_columns by the total population of the corresponding race + dplyr::all_of(names(r_columns)), + function(x) { + x / sum( + dplyr::pick( + sub("^.+_(.{3})$", "r_\\1", dplyr::cur_column(), perl = TRUE) + ) + ) + } + ) ) census <- dplyr::ungroup(census) - names(census)[[2]] <- "zcta" - census } From 0fa0953a2c4b9a5684783298c0169b143ca66e1c Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Thu, 30 Nov 2023 15:46:38 -0800 Subject: [PATCH 19/33] refactor(census_geo_api): use `census_geo_api_names()` --- R/census_geo_api.R | 182 ++++++++------------------------------------- 1 file changed, 31 insertions(+), 151 deletions(-) diff --git a/R/census_geo_api.R b/R/census_geo_api.R index 1c64d22..d4055e4 100644 --- a/R/census_geo_api.R +++ b/R/census_geo_api.R @@ -80,68 +80,18 @@ census_geo_api <- function( year <- rlang::arg_match(year) census <- NULL - state <- toupper(state) + state <- as_state_abbreviation(state) df.out <- NULL state.fips <- as_fips_code(state) - # if (age == F & sex == F) { - # num <- ifelse(3:10 != 10, paste("0", 3:10, sep = ""), "10") - # vars <- paste("P0050", num, sep = "") - # } - - # assign variable values based on the year of the census data - if (as.character(year) != "2020"){ - vars_ <- c( - pop_white = 'P005003', pop_black = 'P005004', - pop_aian = 'P005005', pop_asian = 'P005006', - pop_nhpi = 'P005007', pop_other = 'P005008', - pop_two = 'P005009', pop_hisp = 'P005010' - ) - } else { - vars_ <- c( - pop_white = 'P2_005N', pop_black = 'P2_006N', - pop_aian = 'P2_007N', pop_asian = 'P2_008N', - pop_nhpi = 'P2_009N', pop_other = 'P2_010N', - pop_two = 'P2_011N', pop_hisp = 'P2_002N' - ) - } - - if (age == F & sex == T) { - eth.let <- c("I", "B", "H", "D", "E", "F", "C") - num <- as.character(c("01", "02", "26")) - for (e in 1:length(eth.let)) { - vars_ <- c(vars_, paste("P012", eth.let[e], "0", num, sep = "")) - } - } - - if (age == T & sex == F) { - eth.let <- c("I", "B", "H", "D", "E", "F", "C") - num <- as.character(c(c("01", "03", "04", "05", "06", "07", "08", "09"), seq(10, 25), seq(27, 49))) - for (e in 1:length(eth.let)) { - vars_ <- c(vars_, paste("P012", eth.let[e], "0", num, sep = "")) - } - } - - if (age == T & sex == T) { - eth.let <- c("I", "B", "H", "D", "E", "F", "C") - num <- as.character(c(c("01", "03", "04", "05", "06", "07", "08", "09"), seq(10, 25), seq(27, 49))) - for (e in 1:length(eth.let)) { - vars_ <- c(vars_, paste("P012", eth.let[e], "0", num, sep = "")) - } - } - - # set the census data url links - census_data_url <- switch( - as.character(year), - "2010" = "https://api.census.gov/data/2010/dec/sf1?", - "2020" = "https://api.census.gov/data/2020/dec/pl?" - ) + vars <- census_geo_api_names(year = year, age = age, sex = sex) + census_data_url <- census_geo_api_url(year = year) if (geo == "place") { geo.merge <- c("state", "place") region <- paste("for=place:*&in=state:", state.fips, sep = "") - census <- get_census_api(census_data_url, key = key, var.names = vars_, region = region, retry) + census <- get_census_api(census_data_url, key = key, var.names = unlist(vars), region = region, retry) } if (geo == "county") { @@ -154,7 +104,7 @@ census_geo_api <- function( region <- paste("for=county:",counties_paste,"&in=state:", state.fips, sep = "") } - census <- get_census_api(census_data_url, key = key, var.names = vars_, region = region, retry) + census <- get_census_api(census_data_url, key = key, var.names = unlist(vars), region = region, retry) } if (geo == "tract") { @@ -168,7 +118,7 @@ census_geo_api <- function( region_county <- paste("for=county:",counties_paste,"&in=state:", state.fips, sep = "") } - county_df <- get_census_api(census_data_url, key = key, var.names = vars_, region = region_county, retry) + county_df <- get_census_api(census_data_url, key = key, var.names = unlist(vars), region = region_county, retry) if(is.null(counties)) { county_list <- county_df$county @@ -180,7 +130,7 @@ census_geo_api <- function( census_tracts <- furrr::future_map_dfr(seq_along(county_list), function(county) { message(paste("County ", county, " of ", length(county_list), ": ", county_list[county], sep = "")) region_county <- paste("for=tract:*&in=state:", state.fips, "+county:", county_list[county], sep = "") - get_census_api(data_url = census_data_url, key = key, var.names = vars_, region = region_county, retry) + get_census_api(data_url = census_data_url, key = key, var.names = unlist(vars), region = region_county, retry) }) census <- rbind(census, census_tracts) @@ -200,7 +150,7 @@ census_geo_api <- function( region_county <- paste("for=county:",counties_paste,"&in=state:", state.fips, sep = "") } - county_df <- get_census_api(census_data_url, key = key, var.names = vars_, region = region_county, retry) + county_df <- get_census_api(census_data_url, key = key, var.names = unlist(vars), region = region_county, retry) if(is.null(counties)) { county_list <- county_df$county @@ -220,7 +170,7 @@ census_geo_api <- function( blockgroup <- paste("for=block+group:*&in=state:", state.fips, "+county:", county_list[county], sep = "") # message(region_tract) - blockgroup_df <- get_census_api(census_data_url, key = key, var.names = vars_, region = blockgroup, retry) + blockgroup_df <- get_census_api(census_data_url, key = key, var.names = unlist(vars), region = blockgroup, retry) names(blockgroup_df)[4] <- "block_group" # Fix name, it comes in with a space from api. blockgroup_df } @@ -245,7 +195,7 @@ census_geo_api <- function( region_county <- paste("for=county:",counties_paste,"&in=state:", state.fips, sep = "") } - county_df <- get_census_api(census_data_url, key = key, var.names = vars_, region = region_county, retry) + county_df <- get_census_api(census_data_url, key = key, var.names = unlist(vars), region = region_county, retry) if(is.null(counties)) { county_list <- county_df$county @@ -264,14 +214,14 @@ census_geo_api <- function( region_tract <- paste("for=tract:*&in=state:", state.fips, "+county:", county_list[county], sep = "") # message(region_tract) - tract_df <- get_census_api(census_data_url, key = key, var.names = vars_, region = region_tract, retry) + tract_df <- get_census_api(census_data_url, key = key, var.names = unlist(vars), region = region_tract, retry) tract_list <- tract_df$tract furrr::future_map_dfr(1:length(tract_list), function(tract) { message(paste("Tract ", tract, " of ", length(tract_list), ": ", tract_list[tract], sep = "")) region_block <- paste("for=block:*&in=state:", state.fips, "+county:", county_list[county], "+tract:", tract_list[tract], sep = "") - get_census_api(census_data_url, key = key, var.names = vars_, region = region_block, retry) + get_census_api(census_data_url, key = key, var.names = unlist(vars), region = region_block, retry) }) } ) @@ -284,97 +234,27 @@ census_geo_api <- function( } } - census$state <- state + census <- dplyr::mutate(census, state = as_state_abbreviation(state)) - if (age == F & sex == F) { - - ## Calculate Pr(Geolocation | Race) - census$r_whi <- census[, vars_["pop_white"]] / sum(census[, vars_["pop_white"]]) #Pr(Geo|White) - census$r_bla <- census[, vars_["pop_black"]] / sum(census[, vars_["pop_black"]]) #Pr(Geo|Black) - census$r_his <- census[, vars_["pop_hisp"]] / sum(census[, vars_["pop_hisp"]]) #Pr(Geo|Latino) - census$r_asi <- (census[, vars_["pop_asian"]] + census[, vars_["pop_nhpi"]]) / (sum(census[, vars_["pop_asian"]]) + sum(census[, vars_["pop_nhpi"]])) #Pr(Geo | Asian or NH/PI) - census$r_oth <- (census[, vars_["pop_aian"]] + census[, vars_["pop_other"]] + census[, vars_["pop_two"]]) / (sum(census[, vars_["pop_aian"]]) + sum(census[, vars_["pop_other"]]) + sum(census[, vars_["pop_two"]])) #Pr(Geo | AI/AN, Other, or Mixed) - - } + r_columns <- purrr::map(vars, function(vars) rowSums(census[vars])) - if (age == F & sex == T) { - - ## Calculate Pr(Geolocation, Sex | Race) - eth.cen <- c("whi", "bla", "his", "asi", "oth") - eth.let <- c("I", "B", "H", "D", "F") - - for (i in 1:length(eth.cen)) { - if (i != 4 & i != 5) { - census[paste("r_mal", eth.cen[i], sep = "_")] <- census[paste("P012", eth.let[i], "002", sep = "")] / sum(census[paste("P012", eth.let[i], "001", sep = "")]) - census[paste("r_fem", eth.cen[i], sep = "_")] <- census[paste("P012", eth.let[i], "026", sep = "")] / sum(census[paste("P012", eth.let[i], "001", sep = "")]) - } - if (i == 4) { - ## Combine Asian and Native Hawaiian/Pacific Islander - census[paste("r_mal", eth.cen[i], sep = "_")] <- (census$P012D002 + census$P012E002) / sum(census$P012D001 + census$P012E001) - census[paste("r_fem", eth.cen[i], sep = "_")] <- (census$P012D026 + census$P012E026) / sum(census$P012D001 + census$P012E001) + census <- dplyr::bind_cols(census, r_columns) + census <- dplyr::group_by(census, dplyr::across(dplyr::any_of("state"))) + census <- dplyr::mutate( + census, + dplyr::across( + # Divide all r_columns by the total population of the corresponding race + dplyr::all_of(names(r_columns)), + function(x) { + x / sum( + dplyr::pick( + sub("^.+_(.{3})$", "r_\\1", dplyr::cur_column(), perl = TRUE) + ) + ) } - if (i == 5) { - ## Combine American India/Alaska Native and Other - census[paste("r_mal", eth.cen[i], sep = "_")] <- (census$P012C002 + census$P012F002) / sum(census$P012C001 + census$P012F001) - census[paste("r_fem", eth.cen[i], sep = "_")] <- (census$P012C026 + census$P012F026) / sum(census$P012C001 + census$P012F001) - } - } - } - - if (age == T & sex == F) { - - ## Calculate Pr(Geolocation, Age Category | Race) - eth.cen <- c("whi", "bla", "his", "asi", "oth") - eth.let <- c("I", "B", "H", "D", "F") - age.cat <- c(seq(1, 23), seq(1, 23)) - age.cen <- as.character(c(c("03", "04", "05", "06", "07", "08", "09"), seq(10, 25), seq(27, 49))) - - for (i in 1:length(eth.cen)) { - for (j in 1:23) { - if (i != 4 & i != 5) { - census[paste("r", age.cat[j], eth.cen[i], sep = "_")] <- (census[paste("P012", eth.let[i], "0", age.cen[j], sep = "")] + census[paste("P012", eth.let[i], "0", age.cen[j + 23], sep = "")]) / sum(census[paste("P012", eth.let[i], "001", sep = "")]) - } - if (i == 4) { - ## Combine Asian and Native Hawaiian/Pacific Islander - census[paste("r", age.cat[j], eth.cen[i], sep = "_")] <- (census[paste("P012D0", age.cen[j], sep = "")] + census[paste("P012D0", age.cen[j + 23], sep = "")] + census[paste("P012E0", age.cen[j], sep = "")] + census[paste("P012E0", age.cen[j + 23], sep = "")]) / sum(census$P012D001 + census$P012E001) - } - if (i == 5) { - ## Combine American India/Alaska Native and Other - census[paste("r", age.cat[j], eth.cen[i], sep = "_")] <- (census[paste("P012C0", age.cen[j], sep = "")] + census[paste("P012C0", age.cen[j + 23], sep = "")] + census[paste("P012F0", age.cen[j], sep = "")] + census[paste("P012F0", age.cen[j + 23], sep = "")]) / sum(census$P012C001 + census$P012F001) - } - } - } - } + ) + ) + census <- dplyr::ungroup(census) - if (age == T & sex == T) { - - ## Calculate Pr(Geolocation, Sex, Age Category | Race) - eth.cen <- c("whi", "bla", "his", "asi", "oth") - eth.let <- c("I", "B", "H", "D", "F") - sex.let <- c("mal", "fem") - age.cat <- c(seq(1, 23), seq(1, 23)) - age.cen <- as.character(c(c("03", "04", "05", "06", "07", "08", "09"), seq(10, 25), seq(27, 49))) - - for (i in 1:length(eth.cen)) { - for (k in 1:length(sex.let)) { - for (j in 1:23) { - if (k == 2) { - j <- j + 23 - } - if (i != 4 & i != 5) { - census[paste("r", sex.let[k], age.cat[j], eth.cen[i], sep = "_")] <- census[paste("P012", eth.let[i], "0", age.cen[j], sep = "")] / sum(census[paste("P012", eth.let[i], "001", sep = "")]) - } - if (i == 4) { - ## Combine Asian and Native Hawaiian/Pacific Islander - census[paste("r", sex.let[k], age.cat[j], eth.cen[i], sep = "_")] <- (census[paste("P012D0", age.cen[j], sep = "")] + census[paste("P012E0", age.cen[j], sep = "")]) / sum(census$P012D001 + census$P012E001) - } - if (i == 5) { - ## Combine American India/Alaska Native and Other - census[paste("r", sex.let[k], age.cat[j], eth.cen[i], sep = "_")] <- (census[paste("P012C0", age.cen[j], sep = "")] + census[paste("P012F0", age.cen[j], sep = "")]) / sum(census$P012C001 + census$P012F001) - } - } - } - } - } - return(census) + census } From bc8fa3c2256579e81dfe92851977f59768293c3c Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Thu, 30 Nov 2023 19:56:10 -0800 Subject: [PATCH 20/33] refactor(census_geo_api): don't create geographic variables that are never used --- R/census_geo_api.R | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/R/census_geo_api.R b/R/census_geo_api.R index d4055e4..89140ee 100644 --- a/R/census_geo_api.R +++ b/R/census_geo_api.R @@ -89,14 +89,11 @@ census_geo_api <- function( census_data_url <- census_geo_api_url(year = year) if (geo == "place") { - geo.merge <- c("state", "place") region <- paste("for=place:*&in=state:", state.fips, sep = "") census <- get_census_api(census_data_url, key = key, var.names = unlist(vars), region = region, retry) } if (geo == "county") { - geo.merge <- c("state", "county") - if (is.null(counties)) { region <- paste("for=county:*&in=state:", state.fips, sep = "") } else { @@ -108,9 +105,6 @@ census_geo_api <- function( } if (geo == "tract") { - - geo.merge <- c("state", "county", "tract") - if (is.null(counties)) { region_county <- paste("for=county:*&in=state:", state.fips, sep = "") } else { @@ -127,22 +121,17 @@ census_geo_api <- function( } if(length(county_list) > 0) { - census_tracts <- furrr::future_map_dfr(seq_along(county_list), function(county) { + census <- furrr::future_map_dfr(seq_along(county_list), function(county) { message(paste("County ", county, " of ", length(county_list), ": ", county_list[county], sep = "")) region_county <- paste("for=tract:*&in=state:", state.fips, "+county:", county_list[county], sep = "") get_census_api(data_url = census_data_url, key = key, var.names = unlist(vars), region = region_county, retry) }) - - census <- rbind(census, census_tracts) - rm(census_tracts) } else { message('There were no intersecting counties in your voter.file data (tract)') } } if (geo == "block_group") { - geo.merge <- c("state", "county", "tract", "block_group") - if (is.null(counties)) { region_county <- paste("for=county:*&in=state:", state.fips, sep = "") } else { @@ -161,7 +150,7 @@ census_geo_api <- function( if(length(county_list) > 0) { message('Running block_group by county...') - census_blockgroup <- purrr::map_dfr( + census <- purrr::map_dfr( 1:length(county_list), function(county) { # too verbose, commenting out @@ -176,18 +165,12 @@ census_geo_api <- function( } ) message("\n") # new line for progress bar - - census <- rbind(census, census_blockgroup) - rm(census_blockgroup) } else { message('There were no intersecting counties in your voter.file data (block)') } } if (geo == "block") { - - geo.merge <- c("state", "county", "tract", "block") - if (is.null(counties)) { region_county <- paste("for=county:*&in=state:", state.fips, sep = "") } else { @@ -206,7 +189,7 @@ census_geo_api <- function( if(length(county_list) > 0) { message('Running block by county...') - census_blocks <- purrr::map_dfr( + census <- purrr::map_dfr( 1:length(county_list), function(county) { # too verbose, commenting out @@ -226,9 +209,6 @@ census_geo_api <- function( } ) message("\n") # new line for progress bar - - census <- rbind(census, census_blocks) - rm(census_blocks) } else { message('There were no intersecting counties in your voter.file data (block)') } From f138b22e49dcf287c1918e45b57995b3943fb3bc Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Thu, 30 Nov 2023 20:08:05 -0800 Subject: [PATCH 21/33] refactor(census_geo_api): move main logic from `census_geo_api_zcta()` into `census_geo_api()`; turn `census_geo_api_zcta()` into a helper function --- R/census_geo_api.R | 24 +++++++-------- R/census_geo_api_zcta.R | 62 +++++++------------------------------- man/census_geo_api_zcta.Rd | 61 ------------------------------------- 3 files changed, 22 insertions(+), 125 deletions(-) delete mode 100644 man/census_geo_api_zcta.Rd diff --git a/R/census_geo_api.R b/R/census_geo_api.R index 89140ee..2c505e7 100644 --- a/R/census_geo_api.R +++ b/R/census_geo_api.R @@ -63,19 +63,6 @@ census_geo_api <- function( geo <- tolower(geo) geo <- rlang::arg_match(geo) - if (geo == "zcta") { - return( - census_geo_api_zcta( - state = state, - age = age, - sex = sex, - year = year, - retry = retry, - key = key - ) - ) - } - year <- as.character(year) year <- rlang::arg_match(year) @@ -214,6 +201,17 @@ census_geo_api <- function( } } + if (geo == "zcta") { + census <- census_geo_api_zcta( + census_data_url = census_data_url, + key = key, + vars = vars, + state = state, + counties = counties, + retry = retry + ) + } + census <- dplyr::mutate(census, state = as_state_abbreviation(state)) r_columns <- purrr::map(vars, function(vars) rowSums(census[vars])) diff --git a/R/census_geo_api_zcta.R b/R/census_geo_api_zcta.R index 93fb0b0..a82dce1 100644 --- a/R/census_geo_api_zcta.R +++ b/R/census_geo_api_zcta.R @@ -1,36 +1,17 @@ -#' Census download function for state-ZCTA-level data -#' -#' @inheritParams census_geo_api -#' @param ... These dots are for future extensions and must be empty. -#' -#' @return A [data.frame] with columns -#' `state`, `zcta`, `r_whi`, `r_bla`, `r_his`, `r_asi`, and `r_oth`. -#' -#' @examplesIf nzchar(Sys.getenv("CENSUS_API_KEY")) -#' \dontrun{census_geo_api_zcta(state = c("DE", "NJ")} -#' \dontrun{census_geo_api_zcta(state = "FL", age = TRUE, sex = TRUE)} -#' \dontrun{census_geo_api_zcta(state = "MA", age = FALSE, sex = FALSE, year = "2020")} -#' -#' @keywords internal census_geo_api_zcta <- function( + census_data_url, + key, + vars, state, - ..., - age = FALSE, - sex = FALSE, - year = c("2020", "2010", "2000"), - retry = 3, - key = Sys.getenv("CENSUS_API_KEY") + counties, + retry ) { - # Validate arguments - rlang::check_dots_empty() - validate_key(key) - assert_boolean(age) - assert_boolean(sex) - year <- as.character(year) - year <- rlang::arg_match(year) - - census_data_url <- census_geo_api_url(year = year) - vars <- census_geo_api_names(year = year, age = age, sex = sex) + if (!is.null(counties)) { + cli::cli_abort( + '{.arg counties} must be {.code NULL} when {.code geo = "zcta"}, + because ZCTA-level census data split by county is not available.' + ) + } region <- paste0( "for=zip%20code%20tabulation%20area%20(or%20part):*&in=state:", @@ -45,28 +26,7 @@ census_geo_api_zcta <- function( retry ) - census <- dplyr::mutate(census, state = as_state_abbreviation(state)) names(census)[[2]] <- "zcta" - r_columns <- purrr::map(vars, function(vars) rowSums(census[vars])) - - census <- dplyr::bind_cols(census, r_columns) - census <- dplyr::group_by(census, dplyr::across(dplyr::any_of("state"))) - census <- dplyr::mutate( - census, - dplyr::across( - # Divide all r_columns by the total population of the corresponding race - dplyr::all_of(names(r_columns)), - function(x) { - x / sum( - dplyr::pick( - sub("^.+_(.{3})$", "r_\\1", dplyr::cur_column(), perl = TRUE) - ) - ) - } - ) - ) - census <- dplyr::ungroup(census) - census } diff --git a/man/census_geo_api_zcta.Rd b/man/census_geo_api_zcta.Rd deleted file mode 100644 index ddcf6bd..0000000 --- a/man/census_geo_api_zcta.Rd +++ /dev/null @@ -1,61 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/census_geo_api_zcta.R -\name{census_geo_api_zcta} -\alias{census_geo_api_zcta} -\title{Census download function for state-ZCTA-level data} -\usage{ -census_geo_api_zcta( - state, - ..., - age = FALSE, - sex = FALSE, - year = c("2020", "2010", "2000"), - retry = 3, - key = Sys.getenv("CENSUS_API_KEY") -) -} -\arguments{ -\item{state}{A required character object specifying which state to extract Census data for, -e.g., \code{"NJ"}.} - -\item{...}{These dots are for future extensions and must be empty.} - -\item{age}{A \code{TRUE}/\code{FALSE} object indicating whether to condition on -age or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). -If \code{TRUE}, function will return Pr(Geolocation, Age | Race). -If \code{\var{sex}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race).} - -\item{sex}{A \code{TRUE}/\code{FALSE} object indicating whether to condition on -sex or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). -If \code{TRUE}, function will return Pr(Geolocation, Sex | Race). -If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race).} - -\item{year}{A character object specifying the year of U.S. Census data to be downloaded. -Use \code{"2010"}, or \code{"2020"}. Default is \code{"2020"}. -Warning: 2020 U.S. Census data is downloaded only when \code{\var{age}} and -\code{\var{sex}} are both \code{FALSE}.} - -\item{retry}{The number of retries at the census website if network interruption occurs.} - -\item{key}{A character string containing a valid Census API key, -which can be requested from the -\href{https://api.census.gov/data/key_signup.html}{U.S. Census API key signup page}. - -By default, attempts to find a census key stored in an -\link[=Sys.getenv]{environment variable} named \code{CENSUS_API_KEY}.} -} -\value{ -A \link{data.frame} with columns -\code{state}, \code{zcta}, \code{r_whi}, \code{r_bla}, \code{r_his}, \code{r_asi}, and \code{r_oth}. -} -\description{ -Census download function for state-ZCTA-level data -} -\examples{ -\dontshow{if (nzchar(Sys.getenv("CENSUS_API_KEY"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -\dontrun{census_geo_api_zcta(state = c("DE", "NJ")} -\dontrun{census_geo_api_zcta(state = "FL", age = TRUE, sex = TRUE)} -\dontrun{census_geo_api_zcta(state = "MA", age = FALSE, sex = FALSE, year = "2020")} -\dontshow{\}) # examplesIf} -} -\keyword{internal} From 0121c3797012b7ee4a236fff19a3db5e2dd4071b Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Thu, 30 Nov 2023 15:43:09 -0800 Subject: [PATCH 22/33] test(census_geo_api): add tests of `census_geo_api()` --- tests/testthat/_snaps/census_geo_api.md | 359 ++++++++++++++++++++++++ tests/testthat/test-census_geo_api.R | 20 ++ 2 files changed, 379 insertions(+) create mode 100644 tests/testthat/_snaps/census_geo_api.md create mode 100644 tests/testthat/test-census_geo_api.R diff --git a/tests/testthat/_snaps/census_geo_api.md b/tests/testthat/_snaps/census_geo_api.md new file mode 100644 index 0000000..faf5502 --- /dev/null +++ b/tests/testthat/_snaps/census_geo_api.md @@ -0,0 +1,359 @@ +# snapshot + + structure(list(state = c("DE", "DE", "DE"), county = c("003", + "005", "001"), P12I_001N = c(303265, 171741, 104845), P12B_001N = c(146544, + 25357, 46998), P12H_001N = c(63516, 26793, 13981), P12D_001N = c(35200, + 3070, 4429), P12E_001N = c(175, 112, 125), P12C_001N = c(2156, + 1843, 1149), P12F_001N = c(28714, 14137, 5658), P12G_001N = c(43699, + 17012, 15807), r_whi = c(0.523005047848499, 0.296181260358264, + 0.180813691793237), r_bla = c(0.669459431061814, 0.115838811506677, + 0.21470175743151), r_his = c(0.609032505513472, 0.256908620193691, + 0.134058874292837), r_asi = c(0.820556238547007, 0.0738094685811046, + 0.105634292871889), r_oth = c(0.572836566160937, 0.253443441521029, + 0.173719992318033)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, + -3L)) + +--- + + structure(list(state = c("DE", "DE", "DE"), county = c("001", + "005", "003"), P012I001 = c(105891, 149025, 331836), P012B001 = c(38913, + 25115, 127786), P012H001 = c(9346, 16954, 46921), P012D001 = c(3306, + 1943, 23300), P012E001 = c(91, 150, 159), P012C001 = c(1043, + 1564, 1574), P012F001 = c(3235, 8169, 19115), P012G001 = c(5723, + 4541, 13590), r_whi = c(0.180469772578534, 0.253982943390052, + 0.565547284031414), r_bla = c(0.202868403766148, 0.130934134109085, + 0.666197462124767), r_his = c(0.127640977315251, 0.23154559484301, + 0.64081342784174), r_asi = c(0.117344295139728, 0.0722995612974541, + 0.810356143562817), r_oth = c(0.170799603784541, 0.243774976944359, + 0.5854254192711)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, + -3L)) + +--- + + structure(list(state = c("DE", "DE", "DE"), county = c("001", + "005", "003"), P012I001 = c(105891, 149025, 331836), P012B001 = c(38913, + 25115, 127786), P012H001 = c(9346, 16954, 46921), P012D001 = c(3306, + 1943, 23300), P012E001 = c(91, 150, 159), P012C001 = c(1043, + 1564, 1574), P012F001 = c(3235, 8169, 19115), P012G001 = c(5723, + 4541, 13590), P012I002 = c(51574, 72032, 160694), P012B002 = c(18194, + 12160, 60237), P012H002 = c(4579, 9154, 24193), P012D002 = c(1355, + 889, 11467), P012E002 = c(44, 83, 71), P012C002 = c(509, 828, + 767), P012F002 = c(1660, 4492, 10111), P012G002 = c(2705, 2207, + 6433), P012I026 = c(54317, 76993, 171142), P012B026 = c(20719, + 12955, 67549), P012H026 = c(4767, 7800, 22728), P012D026 = c(1951, + 1054, 11833), P012E026 = c(47, 67, 88), P012C026 = c(534, 736, + 807), P012F026 = c(1575, 3677, 9004), P012G026 = c(3018, 2334, + 7157), r_whi = c(0.180469772578534, 0.253982943390052, 0.565547284031414 + ), r_bla = c(0.202868403766148, 0.130934134109085, 0.666197462124767 + ), r_his = c(0.127640977315251, 0.23154559484301, 0.64081342784174 + ), r_asi = c(0.117344295139728, 0.0722995612974541, 0.810356143562817 + ), r_oth = c(0.170799603784541, 0.243774976944359, 0.5854254192711 + ), r_mal_whi = c(0.0878974421902269, 0.122763961605585, 0.273870391579407 + ), r_mal_bla = c(0.0948523048369775, 0.0633947469944842, 0.314038599893647 + ), r_mal_his = c(0.0625367039510523, 0.125018778765655, 0.330410674533262 + ), r_mal_asi = c(0.0483263670593112, 0.0335762893364192, 0.398562990086013 + ), r_mal_oth = c(0.0832394029442907, 0.128548006967927, 0.295641629948424 + ), r_fem_whi = c(0.0925723303883072, 0.131218981784468, 0.291676892452007 + ), r_fem_bla = c(0.108016098929171, 0.0675393871146006, 0.35215886223112 + ), r_fem_his = c(0.0651042733641988, 0.106526816077355, 0.310402753308477 + ), r_fem_asi = c(0.0690179280804173, 0.0387232719610349, 0.411793153476804 + ), r_fem_oth = c(0.08756020084025, 0.115226969976432, 0.289783789322677 + )), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, + -3L)) + +--- + + structure(list(state = c("DE", "DE", "DE"), county = c("001", + "005", "003"), P012I001 = c(105891, 149025, 331836), P012B001 = c(38913, + 25115, 127786), P012H001 = c(9346, 16954, 46921), P012D001 = c(3306, + 1943, 23300), P012E001 = c(91, 150, 159), P012C001 = c(1043, + 1564, 1574), P012F001 = c(3235, 8169, 19115), P012G001 = c(5723, + 4541, 13590), P012I003 = c(3159, 3151, 7793), P012I027 = c(2913, + 3035, 7497), P012B003 = c(1488, 1030, 4669), P012B027 = c(1422, + 991, 4637), P012H003 = c(600, 1253, 2735), P012H027 = c(616, + 1198, 2630), P012D003 = c(86, 51, 917), P012E003 = c(1, 11, 6 + ), P012D027 = c(104, 54, 872), P012E027 = c(3, 4, 4), P012C003 = c(26, + 78, 64), P012F003 = c(178, 587, 1029), P012G003 = c(538, 443, + 1203), P012C027 = c(22, 73, 62), P012F027 = c(195, 570, 1008), + P012G027 = c(541, 493, 1252), P012I004 = c(3265, 3384, 8494 + ), P012I028 = c(3030, 3125, 8084), P012B004 = c(1484, 952, + 4917), P012B028 = c(1548, 966, 4743), P012H004 = c(525, 986, + 2529), P012H028 = c(504, 911, 2426), P012D004 = c(111, 74, + 796), P012E004 = c(6, 10, 5), P012D028 = c(118, 74, 863), + P012E028 = c(3, 6, 10), P012C004 = c(24, 68, 60), P012F004 = c(163, + 475, 1012), P012G004 = c(458, 355, 1030), P012C028 = c(25, + 48, 64), P012F028 = c(156, 450, 947), P012G028 = c(488, 374, + 1031), P012I005 = c(3292, 3462, 9316), P012I029 = c(3044, + 3367, 8870), P012B005 = c(1584, 989, 5262), P012B029 = c(1522, + 920, 5015), P012H005 = c(381, 666, 2147), P012H029 = c(440, + 695, 2124), P012D005 = c(106, 54, 672), P012E005 = c(5, 1, + 10), P012D029 = c(93, 85, 645), P012E029 = c(3, 3, 9), P012C005 = c(34, + 42, 56), P012F005 = c(131, 316, 845), P012G005 = c(393, 283, + 769), P012C029 = c(31, 38, 70), P012F029 = c(124, 331, 850 + ), P012G029 = c(413, 283, 853), P012I006 = c(2040, 2352, + 6242), P012I030 = c(1942, 2303, 5956), P012B006 = c(965, + 652, 3416), P012B030 = c(974, 653, 3273), P012H006 = c(271, + 411, 1290), P012H030 = c(243, 332, 1172), P012D006 = c(59, + 41, 399), P012E006 = c(2, 1, 2), P012D030 = c(52, 49, 362 + ), P012E030 = c(3, 3, 8), P012C006 = c(28, 34, 50), P012F006 = c(79, + 185, 544), P012G006 = c(185, 154, 453), P012C030 = c(20, + 30, 49), P012F030 = c(86, 163, 495), P012G030 = c(183, 161, + 404), P012I007 = c(1389, 1421, 5291), P012I031 = c(1311, + 1311, 5727), P012B007 = c(1018, 447, 2272), P012B031 = c(1174, + 382, 2093), P012H007 = c(174, 276, 945), P012H031 = c(207, + 194, 944), P012D007 = c(49, 18, 362), P012E007 = c(0, 1, + 4), P012D031 = c(29, 26, 354), P012E031 = c(0, 4, 0), P012C007 = c(14, + 20, 28), P012F007 = c(71, 147, 362), P012G007 = c(120, 81, + 279), P012C031 = c(16, 17, 26), P012F031 = c(66, 92, 344), + P012G031 = c(150, 79, 289), P012I008 = c(678, 708, 2730), + P012I032 = c(643, 641, 3067), P012B008 = c(463, 194, 1040 + ), P012B032 = c(575, 170, 1003), P012H008 = c(101, 163, 508 + ), P012H032 = c(77, 99, 457), P012D008 = c(13, 12, 217), + P012E008 = c(3, 1, 2), P012D032 = c(24, 9, 173), P012E032 = c(3, + 1, 2), P012C008 = c(8, 13, 14), P012F008 = c(37, 88, 215), + P012G008 = c(54, 19, 126), P012C032 = c(4, 10, 13), P012F032 = c(25, + 40, 181), P012G032 = c(42, 23, 140), P012I009 = c(738, 577, + 2685), P012I033 = c(735, 589, 2939), P012B009 = c(419, 170, + 934), P012B033 = c(463, 170, 906), P012H009 = c(91, 185, + 485), P012H033 = c(97, 124, 429), P012D009 = c(15, 11, 139 + ), P012E009 = c(1, 2, 3), P012D033 = c(16, 6, 185), P012E033 = c(1, + 1, 3), P012C009 = c(5, 9, 10), P012F009 = c(36, 102, 188), + P012G009 = c(41, 34, 102), P012C033 = c(13, 13, 16), P012F033 = c(35, + 64, 172), P012G033 = c(59, 30, 131), P012I010 = c(2122, 1951, + 6605), P012I034 = c(2010, 1970, 6403), P012B010 = c(945, + 500, 2657), P012B034 = c(955, 559, 2756), P012H010 = c(302, + 623, 1397), P012H034 = c(260, 482, 1156), P012D010 = c(74, + 35, 482), P012E010 = c(4, 5, 5), P012D034 = c(67, 33, 510 + ), P012E034 = c(3, 2, 6), P012C010 = c(25, 38, 27), P012F010 = c(114, + 329, 627), P012G010 = c(115, 72, 236), P012C034 = c(19, 34, + 27), P012F034 = c(99, 252, 466), P012G034 = c(136, 79, 311 + ), P012I011 = c(3277, 3402, 10187), P012I035 = c(3380, 3429, + 10014), P012B011 = c(1158, 837, 4120), P012B035 = c(1340, + 842, 4478), P012H011 = c(431, 1169, 2415), P012H035 = c(426, + 835, 2092), P012D011 = c(87, 49, 1151), P012E011 = c(5, 15, + 6), P012D035 = c(125, 64, 1355), P012E035 = c(2, 11, 6), + P012C011 = c(28, 64, 50), P012F011 = c(186, 638, 1181), P012G011 = c(167, + 129, 370), P012C035 = c(34, 57, 55), P012F035 = c(146, 413, + 910), P012G035 = c(187, 147, 465), P012I012 = c(2963, 3214, + 9253), P012I036 = c(3226, 3168, 9400), P012B012 = c(922, + 699, 4038), P012B036 = c(1228, 767, 4581), P012H012 = c(366, + 930, 2184), P012H036 = c(345, 777, 2003), P012D012 = c(91, + 71, 1283), P012E012 = c(1, 4, 8), P012D036 = c(141, 68, 1221 + ), P012E036 = c(5, 5, 4), P012C012 = c(37, 59, 49), P012F012 = c(161, + 461, 1012), P012G012 = c(121, 119, 386), P012C036 = c(28, + 48, 61), P012F036 = c(147, 385, 892), P012G036 = c(150, 113, + 420), P012I013 = c(3048, 3523, 9894), P012I037 = c(3327, + 3718, 9876), P012B013 = c(1045, 707, 4174), P012B037 = c(1282, + 712, 4821), P012H013 = c(297, 767, 1866), P012H037 = c(332, + 657, 1784), P012D013 = c(123, 70, 1173), P012E013 = c(2, + 8, 3), P012D037 = c(163, 93, 1091), P012E037 = c(2, 9, 10 + ), P012C013 = c(32, 48, 66), P012F013 = c(129, 431, 837), + P012G013 = c(110, 82, 320), P012C037 = c(37, 64, 69), P012F037 = c(127, + 337, 764), P012G037 = c(132, 90, 380), P012I014 = c(3603, + 4282, 11235), P012I038 = c(3657, 4435, 11648), P012B014 = c(1142, + 842, 4436), P012B038 = c(1347, 937, 5499), P012H014 = c(240, + 565, 1637), P012H038 = c(265, 448, 1581), P012D014 = c(126, + 83, 864), P012E014 = c(3, 4, 4), P012D038 = c(171, 86, 896 + ), P012E038 = c(2, 2, 5), P012C014 = c(28, 50, 52), P012F014 = c(83, + 288, 722), P012G014 = c(85, 90, 297), P012C038 = c(48, 46, + 53), P012F038 = c(95, 202, 658), P012G038 = c(119, 72, 350 + ), P012I015 = c(4108, 5377, 13313), P012I039 = c(4280, 5636, + 13731), P012B015 = c(1261, 918, 4578), P012B039 = c(1413, + 973, 5489), P012H015 = c(229, 353, 1290), P012H039 = c(245, + 311, 1190), P012D015 = c(106, 68, 752), P012E015 = c(1, 4, + 6), P012D039 = c(178, 100, 806), P012E039 = c(4, 3, 8), P012C015 = c(42, + 76, 64), P012F015 = c(105, 161, 548), P012G015 = c(70, 92, + 252), P012C039 = c(40, 48, 62), P012F039 = c(94, 137, 470 + ), P012G039 = c(105, 101, 313), P012I016 = c(3831, 5618, + 13725), P012I040 = c(4119, 6144, 14291), P012B016 = c(1073, + 875, 4074), P012B040 = c(1320, 982, 4946), P012H016 = c(172, + 259, 957), P012H040 = c(193, 212, 816), P012D016 = c(81, + 65, 622), P012E016 = c(6, 4, 2), P012D040 = c(137, 80, 646 + ), P012E040 = c(9, 4, 2), P012C016 = c(47, 61, 57), P012F016 = c(64, + 98, 379), P012G016 = c(79, 68, 180), P012C040 = c(46, 36, + 60), P012F040 = c(65, 85, 285), P012G040 = c(90, 64, 252), + P012I017 = c(3198, 5713, 11775), P012I041 = c(3583, 6570, + 12620), P012B017 = c(913, 703, 3013), P012B041 = c(1161, + 801, 3955), P012H017 = c(129, 180, 679), P012H041 = c(113, + 161, 626), P012D017 = c(51, 44, 490), P012E017 = c(1, 3, + 3), P012D041 = c(133, 60, 552), P012E041 = c(2, 1, 3), P012C017 = c(37, + 49, 50), P012F017 = c(45, 78, 247), P012G017 = c(45, 40, + 135), P012C041 = c(45, 41, 39), P012F041 = c(27, 66, 201), + P012G041 = c(56, 52, 200), P012I018 = c(1273, 2551, 4128), + P012I042 = c(1382, 2830, 4509), P012B018 = c(320, 227, 1110 + ), P012B042 = c(422, 275, 1377), P012H018 = c(42, 59, 202 + ), P012H042 = c(61, 52, 200), P012D018 = c(28, 20, 165), + P012E018 = c(0, 1, 0), P012D042 = c(49, 27, 182), P012E042 = c(0, + 3, 2), P012C018 = c(13, 11, 7), P012F018 = c(11, 23, 65), + P012G018 = c(18, 20, 46), P012C042 = c(18, 14, 9), P012F042 = c(13, + 22, 57), P012G042 = c(21, 21, 55), P012I019 = c(1925, 4078, + 5826), P012I043 = c(2108, 4596, 6378), P012B019 = c(450, + 319, 1340), P012B043 = c(550, 350, 1776), P012H019 = c(51, + 71, 233), P012H043 = c(79, 68, 272), P012D019 = c(29, 29, + 217), P012E019 = c(0, 1, 2), P012D043 = c(75, 29, 289), P012E043 = c(0, + 2, 2), P012C019 = c(19, 24, 15), P012F019 = c(21, 27, 76), + P012G019 = c(30, 22, 63), P012C043 = c(20, 21, 15), P012F043 = c(15, + 18, 83), P012G043 = c(28, 34, 79), P012I020 = c(1123, 2492, + 3106), P012I044 = c(1247, 2728, 3446), P012B020 = c(230, + 160, 721), P012B044 = c(296, 208, 933), P012H020 = c(17, + 42, 111), P012H044 = c(31, 33, 140), P012D020 = c(24, 7, + 116), P012E020 = c(0, 2, 0), P012D044 = c(50, 28, 144), P012E044 = c(0, + 0, 0), P012C020 = c(9, 13, 7), P012F020 = c(2, 14, 39), P012G020 = c(21, + 16, 37), P012C044 = c(6, 4, 7), P012F044 = c(7, 6, 46), P012G044 = c(19, + 13, 26), P012I021 = c(1507, 3532, 4076), P012I045 = c(1706, + 3896, 4876), P012B021 = c(330, 256, 923), P012B045 = c(397, + 270, 1213), P012H021 = c(38, 50, 150), P012H045 = c(55, 49, + 169), P012D021 = c(25, 26, 200), P012E021 = c(1, 0, 0), P012D045 = c(65, + 20, 180), P012E045 = c(0, 1, 0), P012C021 = c(13, 21, 8), + P012F021 = c(7, 12, 47), P012G021 = c(13, 20, 40), P012C045 = c(10, + 17, 14), P012F045 = c(11, 12, 47), P012G045 = c(36, 27, 52 + ), P012I022 = c(1923, 4552, 5078), P012I046 = c(2285, 4883, + 6167), P012B022 = c(448, 262, 1128), P012B046 = c(497, 331, + 1537), P012H022 = c(54, 71, 184), P012H046 = c(65, 66, 204 + ), P012D022 = c(40, 34, 234), P012E022 = c(0, 1, 0), P012D046 = c(61, + 28, 245), P012E046 = c(0, 0, 0), P012C022 = c(16, 27, 15), + P012F022 = c(20, 17, 68), P012G022 = c(24, 35, 43), P012C046 = c(20, + 31, 12), P012F046 = c(15, 10, 59), P012G046 = c(26, 27, 42 + ), P012I023 = c(1563, 3347, 4147), P012I047 = c(1778, 3561, + 5592), P012B023 = c(286, 186, 724), P012B047 = c(397, 280, + 1074), P012H023 = c(27, 39, 114), P012H047 = c(44, 40, 142 + ), P012D023 = c(14, 17, 122), P012E023 = c(0, 1, 0), P012D047 = c(57, + 21, 129), P012E047 = c(1, 1, 3), P012C023 = c(13, 11, 10), + P012F023 = c(5, 9, 28), P012G023 = c(6, 17, 22), P012C047 = c(12, + 23, 9), P012F047 = c(9, 12, 33), P012G047 = c(24, 23, 45), + P012I024 = c(929, 2123, 3204), P012I048 = c(1259, 2540, 4733 + ), P012B024 = c(141, 120, 395), P012B048 = c(200, 211, 747 + ), P012H024 = c(22, 22, 84), P012H048 = c(39, 29, 88), P012D024 = c(11, + 8, 53), P012E024 = c(1, 3, 0), P012D048 = c(26, 10, 72), + P012E048 = c(1, 0, 0), P012C024 = c(8, 8, 5), P012F024 = c(7, + 4, 26), P012G024 = c(6, 9, 28), P012C048 = c(15, 9, 5), P012F048 = c(12, + 7, 19), P012G048 = c(9, 12, 26), P012I025 = c(620, 1222, + 2591), P012I049 = c(1352, 2518, 5318), P012B025 = c(109, + 115, 296), P012B049 = c(236, 205, 697), P012H025 = c(19, + 14, 51), P012H049 = c(30, 27, 83), P012D025 = c(6, 2, 41), + P012E025 = c(1, 0, 0), P012D049 = c(17, 4, 61), P012E049 = c(0, + 1, 1), P012C025 = c(3, 4, 3), P012F025 = c(5, 2, 14), P012G025 = c(6, + 7, 16), P012C049 = c(5, 14, 10), P012F049 = c(6, 3, 17), + P012G049 = c(4, 16, 41), r_whi = c(0.180469772578534, 0.253982943390052, + 0.565547284031414), r_bla = c(0.202868403766148, 0.130934134109085, + 0.666197462124767), r_his = c(0.127640977315251, 0.23154559484301, + 0.64081342784174), r_asi = c(0.117344295139728, 0.0722995612974541, + 0.810356143562817), r_oth = c(0.170799603784541, 0.243774976944359, + 0.5854254192711), r_1_whi = c(0.0103484947643979, 0.0105427846858639, + 0.0260587096422339), r_1_bla = c(0.0151709468547656, 0.0105362486575537, + 0.0485157496324564), r_1_his = c(0.0166072574807774, 0.0334740033596919, + 0.0732713292634627), r_1_asi = c(0.00670144046426474, 0.00414522090573077, + 0.0621437700784138), r_1_oth = c(0.0256173788297981, 0.038323598729378, + 0.0788673702906719), r_2_whi = c(0.0107285531195462, 0.0110932727966841, + 0.028253844895288), r_2_bla = c(0.01580697967823, 0.00999927012626815, + 0.050361287497263), r_2_his = c(0.0140533453517434, 0.0259078679613772, + 0.0676718427773453), r_2_asi = c(0.00822135479636602, 0.00566513523783205, + 0.0578258316349442), r_2_oth = c(0.0224408238549032, 0.0302285070191618, + 0.0707722785804556), r_3_whi = c(0.0107984293193717, 0.0116386480148342, + 0.0309943553664921), r_3_bla = c(0.0161927700793477, 0.00995234967207816, + 0.0535779453011772), r_3_his = c(0.0112126302563472, 0.0185875636770872, + 0.058330260444408), r_3_asi = c(0.00715050606238558, 0.00493972157932916, + 0.0461501260838025), r_3_oth = c(0.0192301123749018, 0.022082180551286, + 0.0588004235406633), r_4_whi = c(0.00678651287085515, 0.00793350512652705, + 0.0207890215968586), r_4_bla = c(0.0101087511860448, 0.0068034658575495, + 0.0348723242307652), r_4_his = c(0.00701984403381544, 0.0101473620955737, + 0.0336242334849292), r_4_asi = c(0.00400704687553974, 0.0032470897094891, + 0.0266330443193202), r_4_oth = c(0.00992246473340848, 0.0124158896061755, + 0.0340711138436315), r_5_whi = c(0.00460160340314136, 0.00465614092495637, + 0.0187779504799302), r_5_bla = c(0.0114277372871636, 0.00432189516927857, + 0.0227564202821483), r_5_his = c(0.00520342524685541, 0.00641892353286625, + 0.0257986096884773), r_5_asi = c(0.002694393588725, 0.00169263186984006, + 0.0248713254343846), r_5_oth = c(0.00746319636574786, 0.00744611811319466, + 0.0226799193906479), r_6_whi = c(0.00225137707242583, 0.00229909740401396, + 0.00987981293630017), r_6_bla = c(0.00541149238324627, 0.00189767170279542, + 0.0106509431011292), r_6_his = c(0.00243099657202169, 0.00357820843747012, + 0.0131792791685445), r_6_asi = c(0.00148537082455353, 0.000794500673598397, + 0.013610141973816), r_6_oth = c(0.00290330293404379, 0.00329610274276736, + 0.0117669160091539), r_7_whi = c(0.00251043030104712, 0.00198721095113438, + 0.00958496945898778), r_7_bla = c(0.00459820451061966, 0.00177255049162209, + 0.00959262618995485), r_7_his = c(0.0025675694131465, 0.00422010079075675, + 0.012482757678808), r_7_asi = c(0.00113993574907596, 0.000690870150955128, + 0.0113993574907596), r_7_oth = c(0.00322778973255457, 0.00430371964340609, + 0.01057143833043), r_8_whi = c(0.007042157504363, 0.00668255071989529, + 0.022169502617801), r_8_bla = c(0.00990542921788816, 0.00552097344302293, + 0.0282200465033835), r_8_his = c(0.00767539367121454, 0.0150912989442919, + 0.034867046339165), r_8_asi = c(0.00511243911706795, 0.00259076306608173, + 0.0346471380703997), r_8_oth = c(0.00867575229702497, 0.0137309150527718, + 0.0289305598251187), r_9_whi = c(0.0113455088350785, 0.0116420566099476, + 0.034428514943281), r_9_bla = c(0.0130230327296235, 0.0087532713983338, + 0.0448246739028434), r_9_his = c(0.0117042924843966, 0.0273691973614127, + 0.0615533794949536), r_9_asi = c(0.00756502815295865, 0.00480154754913814, + 0.0869805520052506), r_9_oth = c(0.0127745329097927, 0.0247293096970318, + 0.0517641834887454), r_10_whi = c(0.010547897578534, 0.0108768270069808, + 0.0317902623254799), r_10_bla = c(0.0112087751676103, 0.00764282064917055, + 0.04493415496262), r_10_his = c(0.00971032900397427, 0.0233129839800057, + 0.0571830485789596), r_10_asi = c(0.00822135479636602, 0.00511243911706795, + 0.0869114649901551), r_10_oth = c(0.01099839464426, 0.0202377292755405, + 0.0481606722000205), r_11_whi = c(0.0108648969240838, 0.0123408186082024, + 0.0336939626963351), r_11_bla = c(0.0121315441000136, 0.00739779161062279, + 0.0468943872710021), r_11_his = c(0.0085904317067508, 0.0194479725761735, + 0.0498490870105571), r_11_asi = c(0.0100176171888494, 0.00621783135859615, + 0.0786555666862413), r_11_oth = c(0.0096833691976637, 0.0179663216859651, + 0.0416026232195922), r_12_whi = c(0.0123732002617801, 0.0148563618019197, + 0.0389994409904014), r_12_bla = c(0.0129761122754335, 0.00927460977822265, + 0.0517949680419573), r_12_his = c(0.0068969284768031, 0.0138348288059437, + 0.0439491402739651), r_12_asi = c(0.0104321392794224, 0.00604511382085737, + 0.0611074648519811), r_12_oth = c(0.00782183966936503, 0.0127745329097927, + 0.0364108344434197), r_13_whi = c(0.0142956479057592, 0.0187694289921466, + 0.0460910231239093), r_13_bla = c(0.0139405882782279, 0.00985850876369817, + 0.0524831347034106), r_13_his = c(0.00647355266931618, 0.00906843665068764, + 0.0338700645989539), r_13_asi = c(0.0099830736813016, 0.00604511382085737, + 0.0543023938650731), r_13_oth = c(0.00778768316425863, 0.0105031253202172, + 0.0291867336134167), r_14_whi = c(0.0135491655759162, 0.0200459478621291, + 0.0477476003490401), r_14_bla = c(0.0124756274307402, 0.00968125371453596, + 0.0470247218659743), r_14_his = c(0.00498490870105571, 0.00643258081697874, + 0.0242143647314295), r_14_asi = c(0.00804863725862724, 0.00528515665480673, + 0.0439393416007461), r_14_oth = c(0.00667759674830071, 0.00703624005191789, + 0.0207159203470301), r_15_whi = c(0.0115568417321117, 0.0209338868891798, + 0.0415763388961606), r_15_bla = c(0.0108125579988948, 0.00784092923352831, + 0.0363268583106551), r_15_his = c(0.0033050627552205, 0.00465713388235616, + 0.0178227557667882), r_15_asi = c(0.00645963591143045, 0.00373069881515769, + 0.0362015959100487), r_15_oth = c(0.00435495440106568, 0.0055675103323428, + 0.0148922362263893), r_16_whi = c(0.00452491001308901, 0.00917082515270506, + 0.0147200179973822), r_16_bla = c(0.00386833077877527, 0.00261711866704203, + 0.0129656855078357), r_16_his = c(0.00140670026358558, 0.00151595853648543, + 0.00549022821321752), r_16_asi = c(0.00265985008117724, 0.00176171888493558, + 0.012055684134167), r_16_oth = c(0.00160535574000068, 0.00189568603340506, + 0.0040817023602145), r_17_whi = c(0.00687343204624782, 0.0147830770069808, + 0.020799247382199), r_17_bla = c(0.00521338379888851, 0.00348775376145641, + 0.0162449039173366), r_17_his = c(0.00177544693462258, 0.00189836249163491, + 0.0068969284768031), r_17_asi = c(0.00359252478496667, 0.00210715396041314, + 0.0176171888493558), r_17_oth = c(0.00227140758957543, 0.00249342487276702, + 0.00565290159510879), r_18_whi = c(0.00403918520942408, 0.0088964332460733, + 0.011166557591623), r_18_bla = c(0.00274223987821535, 0.00191852523799097, + 0.00862293680336159), r_18_his = c(0.000655549637399107, + 0.0010242963084361, 0.00342797831223283), r_18_asi = c(0.00255621955853397, + 0.00127810977926699, 0.00898131196241666), r_18_oth = c(0.00109300816340472, + 0.00112716466851112, 0.0027666769136182), r_19_whi = c(0.00547590804973822, + 0.0126595222513089, 0.0152568717277487), r_19_bla = c(0.00379013002179194, + 0.00274223987821535, 0.0111357877944258), r_19_his = c(0.00127012742246077, + 0.00135207112713566, 0.00435667363188156), r_19_asi = c(0.00314345918684583, + 0.00162354485474455, 0.0131265328681474), r_19_oth = c(0.00153704272978789, + 0.00186152952829866, 0.00355227653106534), r_20_whi = c(0.00717168411867365, + 0.016080047447644, 0.0191648260253054), r_20_bla = c(0.00492664768994964, + 0.00309153659274088, 0.0138936678240379), r_20_his = c(0.00162521680938529, + 0.00187104792340995, 0.00529902623564278), r_20_asi = c(0.0034888942623234, + 0.00217624097550865, 0.0165463401153753), r_20_oth = c(0.00206646855893705, + 0.00251050312532022, 0.0040817023602145), r_21_whi = c(0.00569405813699825, + 0.011773287521815, 0.0165981539048866), r_21_bla = c(0.00356074113464085, + 0.00242943685028204, 0.00937366407040154), r_21_his = c(0.000969667171986179, + 0.00107892544488603, 0.00349626473279524), r_21_asi = c(0.00248713254343846, + 0.00138174030191026, 0.00877405091713013), r_21_oth = c(0.00117839942617071, + 0.00162243399255388, 0.00251050312532022), r_22_whi = c(0.00372900305410122, + 0.0079471395069808, 0.0135270097076789), r_22_bla = c(0.00177776387542098, + 0.0017256300374321, 0.00595368429833067), r_22_his = c(0.000833094330861365, + 0.000696521489736551, 0.0023490528673468), r_22_asi = c(0.0013471967943625, + 0.000725413658502884, 0.00431793844346955), r_22_oth = c(0.000973460395532329, + 0.000836834375106739, 0.00186152952829866), r_23_whi = c(0.00336087478184991, + 0.00637407286212914, 0.0134792893760908), r_23_bla = c(0.00179861741061653, + 0.00166828281564432, 0.00517689011229629), r_23_his = c(0.000669206921511588, + 0.000559948648611737, 0.00183007607107251), r_23_asi = c(0.000829044181146154, + 0.000241804552834295, 0.00355798127741891), r_23_oth = c(0.000495269324042764, + 0.000785599617447143, 0.00172490350787307)), class = c("tbl_df", + "tbl", "data.frame"), row.names = c(NA, -3L)) + diff --git a/tests/testthat/test-census_geo_api.R b/tests/testthat/test-census_geo_api.R new file mode 100644 index 0000000..4fd02c8 --- /dev/null +++ b/tests/testthat/test-census_geo_api.R @@ -0,0 +1,20 @@ +skip_if_not(nzchar(Sys.getenv("CENSUS_API_KEY"))) + +test_that("snapshot", { + expect_snapshot_value( + census_geo_api(state = "DE", geo = "county", year = "2020"), + style = "deparse" + ) + expect_snapshot_value( + census_geo_api(state = "DE", geo = "county", year = "2010"), + style = "deparse" + ) + expect_snapshot_value( + census_geo_api(state = "DE", geo = "county", year = "2010", sex = TRUE), + style = "deparse" + ) + expect_snapshot_value( + census_geo_api(state = "DE", geo = "county", year = "2010", age = TRUE), + style = "deparse" + ) +}) From 11c4372ed24fffd696bdd002236348aa29b76cf0 Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Thu, 30 Nov 2023 21:28:55 -0800 Subject: [PATCH 23/33] fix: add `census_geo_api_names_legacy()` to support census data with column names from past wru versions --- R/census_geo_api_names.R | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/R/census_geo_api_names.R b/R/census_geo_api_names.R index dc2aaa0..3e9514a 100644 --- a/R/census_geo_api_names.R +++ b/R/census_geo_api_names.R @@ -107,6 +107,28 @@ census_geo_api_names <- function( vars } +census_geo_api_names_legacy <- function(year) { + if (year == 2020) { + return( + list( + r_whi = 'P2_005N', + r_bla = 'P2_006N', + r_his = 'P2_002N', + r_asi = c('P2_008N', 'P2_009N'), + r_oth = c('P2_007N', 'P2_010N', 'P2_011N') + ) + ) + } + + list( + r_whi = 'P005003', + r_bla = 'P005004', + r_his = 'P005010', + r_asi = c('P005006', 'P005007'), + r_oth = c('P005005', 'P005008', 'P005009') + ) +} + #' @rdname census_geo_api_names census_geo_api_url <- function(year = c("2020", "2010", "2000")) { year <- as.character(year) From 250c1cde775454cfd2567d21624a03b5f93beed1 Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Thu, 30 Nov 2023 20:13:47 -0800 Subject: [PATCH 24/33] refactor(census_data_preflight): use `census_geo_api_names()` --- R/census_data_preflight.R | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/R/census_data_preflight.R b/R/census_data_preflight.R index 804c066..8ed74d5 100644 --- a/R/census_data_preflight.R +++ b/R/census_data_preflight.R @@ -4,26 +4,12 @@ #' @keywords internal census_data_preflight <- function(census.data, census.geo, year) { - - if (year != "2020"){ - vars_ <- c( - pop_white = 'P005003', pop_black = 'P005004', - pop_aian = 'P005005', pop_asian = 'P005006', - pop_nhpi = 'P005007', pop_other = 'P005008', - pop_two = 'P005009', pop_hisp = 'P005010' - ) - } else { - vars_ <- c( - pop_white = 'P2_005N', pop_black = 'P2_006N', - pop_aian = 'P2_007N', pop_asian = 'P2_008N', - pop_nhpi = 'P2_009N', pop_other = 'P2_010N', - pop_two = 'P2_011N', pop_hisp = 'P2_002N' - ) - } + vars_ <- unlist(census_geo_api_names(year = year)) + legacy_vars <- unlist(census_geo_api_names_legacy(year = year)) test <- lapply(census.data, function(x) { nms_to_test <- names(x[[census.geo]]) - all(vars_ %in% nms_to_test) + all(vars_ %in% nms_to_test) || all(legacy_vars %in% nms_to_test) }) missings <- names(test)[!unlist(test)] From dd4c6a67214e75589341f54732db13042e3696db Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Tue, 28 Nov 2023 13:31:54 -0800 Subject: [PATCH 25/33] feat(census_helper_new): add support for ZCTAs --- R/census_helper_v2.R | 42 ++++++++++++++++------------------------ man/census_helper_new.Rd | 2 +- 2 files changed, 18 insertions(+), 26 deletions(-) diff --git a/R/census_helper_v2.R b/R/census_helper_v2.R index 36dd25b..309a538 100644 --- a/R/census_helper_v2.R +++ b/R/census_helper_v2.R @@ -59,7 +59,7 @@ census_helper_new <- function( key = Sys.getenv("CENSUS_API_KEY"), voter.file, states = "all", - geo = "tract", + geo = c("tract", "block", "block_group", "county", "place", "zcta"), age = FALSE, sex = FALSE, year = "2020", @@ -71,6 +71,9 @@ census_helper_new <- function( stop("Error: census_helper_new function does not currently support precinct-level data.") } + geo <- tolower(geo) + geo <- rlang::arg_match(geo) + if(!(year %in% c("2000","2010","2020"))){ stop("Interface only implemented for census years '2000', '2010', or '2020'.") } @@ -88,10 +91,10 @@ census_helper_new <- function( validate_key(key) } - states <- toupper(states) - if (states == "ALL") { + if (toupper(states) == "ALL") { states <- toupper(as.character(unique(voter.file$state))) } + states <- as_state_abbreviation(states) df.out <- NULL @@ -163,32 +166,21 @@ census_helper_new <- function( } census$state <- state - + ## Calculate Pr(Geolocation | Race) - if (year != "2020") { - vars_ <- c( - pop_white = 'P005003', pop_black = 'P005004', - pop_aian = 'P005005', pop_asian = 'P005006', - pop_nhpi = 'P005007', pop_other = 'P005008', - pop_two = 'P005009', pop_hisp = 'P005010' - ) - drop <- c(grep("state", names(census)), grep("P005", names(census))) + if (any(c("P2_005N", "P005003") %in% names(census))) { + vars_ <- census_geo_api_names_legacy(year = year) } else { - vars_ <- c( - pop_white = 'P2_005N', pop_black = 'P2_006N', - pop_aian = 'P2_007N', pop_asian = 'P2_008N', - pop_nhpi = 'P2_009N', pop_other = 'P2_010N', - pop_two = 'P2_011N', pop_hisp = 'P2_002N' - ) - drop <- c(grep("state", names(census)), grep("P2_", names(census))) + vars_ <- census_geo_api_names(year) } - geoPopulations <- rowSums(census[,names(census) %in% vars_]) + drop <- match(c("state", unlist(vars_)), names(census)) - census$r_whi <- (census[, vars_["pop_white"]]) / (geoPopulations ) #Pr(White | Geo) - census$r_bla <- (census[, vars_["pop_black"]]) / (geoPopulations) #Pr(Black | Geo) - census$r_his <- (census[, vars_["pop_hisp"]]) / (geoPopulations) #Pr(Latino | Geo) - census$r_asi <- (census[, vars_["pop_asian"]] + census[, vars_["pop_nhpi"]]) / (geoPopulations) #Pr(Asian or NH/PI | Geo) - census$r_oth <- (census[, vars_["pop_aian"]] + census[, vars_["pop_other"]] + census[, vars_["pop_two"]]) / (geoPopulations) #Pr(AI/AN, Other, or Mixed | Geo) + geoPopulations <- rowSums(census[,names(census) %in% vars_]) + + for (i in seq_along(vars_)) { + census[[names(vars_)[[i]]]] <- + rowSums(census[, vars_[[i]], drop = FALSE]) / geoPopulations + } # check locations with zero people # get average without places with zero people, and assign that to zero locs. diff --git a/man/census_helper_new.Rd b/man/census_helper_new.Rd index 7293d83..93290c4 100644 --- a/man/census_helper_new.Rd +++ b/man/census_helper_new.Rd @@ -8,7 +8,7 @@ census_helper_new( key = Sys.getenv("CENSUS_API_KEY"), voter.file, states = "all", - geo = "tract", + geo = c("tract", "block", "block_group", "county", "place", "zcta"), age = FALSE, sex = FALSE, year = "2020", From 4e691b6fb2ca81812a4aa28bab58ccda34ac05e4 Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Thu, 30 Nov 2023 20:18:56 -0800 Subject: [PATCH 26/33] feat: add `determine_geo_id_names()` helper function --- R/utils_determine_geo_id_names.R | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 R/utils_determine_geo_id_names.R diff --git a/R/utils_determine_geo_id_names.R b/R/utils_determine_geo_id_names.R new file mode 100644 index 0000000..4df154a --- /dev/null +++ b/R/utils_determine_geo_id_names.R @@ -0,0 +1,10 @@ +determine_geo_id_names <- function(census.geo) { + switch( + census.geo, + "tract" = c("county", "tract"), + "block_group" = c("county", "tract", "block_group"), + "block" = c("county", "tract", "block"), + # Return `census.geo` unchanged for county, place, and zcta + census.geo + ) +} From 8762f0681f8a17e334a3203f4a0870f923d456e6 Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Tue, 28 Nov 2023 13:32:41 -0800 Subject: [PATCH 27/33] feat(predict_race_new): add support for ZCTAs --- R/race_prediction_funs.R | 21 ++++++++------------- man/modfuns.Rd | 2 +- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/R/race_prediction_funs.R b/R/race_prediction_funs.R index 3e43e0e..0c037a0 100644 --- a/R/race_prediction_funs.R +++ b/R/race_prediction_funs.R @@ -274,7 +274,7 @@ predict_race_new <- function( year = "2020", age = FALSE, sex = FALSE, - census.geo, + census.geo = c("tract", "block", "block_group", "county", "place", "zcta"), census.key = Sys.getenv("CENSUS_API_KEY"), name.dictionaries, surname.only=FALSE, @@ -284,7 +284,6 @@ predict_race_new <- function( census.surname = FALSE, use.counties = FALSE ) { - # Check years if (!(year %in% c("2000", "2010", "2020"))){ @@ -293,10 +292,9 @@ predict_race_new <- function( # Define 2020 race marginal race.margin <- c(r_whi=0.5783619, r_bla=0.1205021, r_his=0.1872988, r_asi=0.06106737, r_oth=0.05276981) - # check the geography - if (!missing(census.geo) && (census.geo == "precinct")) { - stop("Error: census_helper function does not currently support merging precinct-level data.") - } + + census.geo <- tolower(census.geo) + census.geo <- rlang::arg_match(census.geo) vars.orig <- names(voter.file) @@ -345,11 +343,8 @@ predict_race_new <- function( # check the geographies if (surname.only == FALSE) { - if (!(census.geo %in% c("county", "tract","block_group", "block", "place"))) { - stop("census.geo must be either 'county', 'tract', 'block', 'block_group', or 'place'") - } else { - message(paste("Proceeding with Census geographic data at", census.geo, "level...")) - } + message("Proceeding with Census geographic data at ", census.geo, " level...") + if (is.null(census.data)) { validate_key(census.key) message("Downloading Census geographic data using provided API key...") @@ -369,11 +364,11 @@ predict_race_new <- function( geo_id_names <- switch( census.geo, - "county" = c("county"), "tract" = c("county", "tract"), "block_group" = c("county", "tract", "block_group"), "block" = c("county", "tract", "block"), - "place" = c("place") + # Return `census.geo` unchanged for county, place, and zcta + census.geo ) if (!all(geo_id_names %in% names(voter.file))) { diff --git a/man/modfuns.Rd b/man/modfuns.Rd index d7d682d..403d2d2 100644 --- a/man/modfuns.Rd +++ b/man/modfuns.Rd @@ -31,7 +31,7 @@ predict_race_new( year = "2020", age = FALSE, sex = FALSE, - census.geo, + census.geo = c("tract", "block", "block_group", "county", "place", "zcta"), census.key = Sys.getenv("CENSUS_API_KEY"), name.dictionaries, surname.only = FALSE, From c256a4a0f6d31bc62cf73ab118ad068fe3e434d4 Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Thu, 30 Nov 2023 20:18:56 -0800 Subject: [PATCH 28/33] geo_id_names new --- R/race_prediction_funs.R | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/R/race_prediction_funs.R b/R/race_prediction_funs.R index 0c037a0..805688f 100644 --- a/R/race_prediction_funs.R +++ b/R/race_prediction_funs.R @@ -362,14 +362,7 @@ predict_race_new <- function( } } - geo_id_names <- switch( - census.geo, - "tract" = c("county", "tract"), - "block_group" = c("county", "tract", "block_group"), - "block" = c("county", "tract", "block"), - # Return `census.geo` unchanged for county, place, and zcta - census.geo - ) + geo_id_names <- determine_geo_id_names(census.geo) if (!all(geo_id_names %in% names(voter.file))) { stop(message("To use",census.geo,"as census.geo, voter.file needs to include the following column(s):", From 03dfd544c45d5df714b781fe1a273af47916398a Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Thu, 30 Nov 2023 20:37:35 -0800 Subject: [PATCH 29/33] feat(predict_race_me): add support for ZCTAs --- R/race_prediction_funs.R | 55 +++++++++++++++------------------------- man/modfuns.Rd | 2 +- 2 files changed, 21 insertions(+), 36 deletions(-) diff --git a/R/race_prediction_funs.R b/R/race_prediction_funs.R index 805688f..ff384f1 100644 --- a/R/race_prediction_funs.R +++ b/R/race_prediction_funs.R @@ -439,7 +439,7 @@ predict_race_me <- function( year = "2020", age = FALSE, sex = FALSE, - census.geo, + census.geo = c("tract", "block", "block_group", "county", "place", "zcta"), census.key = Sys.getenv("CENSUS_API_KEY"), name.dictionaries, surname.only = FALSE, @@ -451,7 +451,9 @@ predict_race_me <- function( race.init, ctrl ) { - + census.geo <- tolower(census.geo) + census.geo <- rlang::arg_match(census.geo) + if(!is.null(census.data)) { census_data_preflight(census.data, census.geo, year) } @@ -494,9 +496,6 @@ predict_race_me <- function( } ## Other quick checks... - if (!(census.geo %in% c("county", "tract","block_group", "block", "place"))) { - stop("census.geo must be either 'county', 'tract', 'block', 'block_group', or 'place'") - } stopifnot( all(!is.na(voter.file$surname)) ) @@ -526,15 +525,7 @@ predict_race_me <- function( ) ## level of geo estimation - geo_id_names <- c("state", switch(census.geo, - "county" = c("county"), - "tract" = c("county", "tract"), - "block_group" = c("county", "tract", "block_group"), - "block" = c("county", "tract", "block"), - "place" = c("place"), - "zipcode" = c("zipcode") - )) - + geo_id_names <- c("state", determine_geo_id_names(census.geo)) #race_pred_args[names(args_usr)] <- args_usr all_states <- unique(voter.file$state) @@ -546,31 +537,25 @@ predict_race_me <- function( if (ctrl$verbose) { message("Forming Pr(race | location) tables from census data...\n") } - if(year == "2020") { - vars_ <- c( - pop_white = 'P2_005N', pop_black = 'P2_006N', - pop_aian = 'P2_007N', pop_asian = 'P2_008N', - pop_nhpi = 'P2_009N', pop_other = 'P2_010N', - pop_two = 'P2_011N', pop_hisp = 'P2_002N' - ) - } else { - vars_ <- c( - pop_white = 'P005003', pop_black = 'P005004', - pop_aian = 'P005005', pop_asian = 'P005006', - pop_nhpi = 'P005007', pop_other = 'P005008', - pop_two = 'P005009', pop_hisp = 'P005010' - ) - } + + vars_ <- census_geo_api_names(year = year) + tmp_tabs <- lapply( census.data, function(x) { all_names <- names(x[[census.geo]]) - tmp <- x[[census.geo]][, c(geo_id_names, grep("P00|P2_0", all_names, value = TRUE))] - tmp$r_whi <- tmp[, vars_["pop_white"]] - tmp$r_bla <- tmp[, vars_["pop_black"]] - tmp$r_his <- tmp[, vars_["pop_hisp"]] - tmp$r_asi <- (tmp[, vars_["pop_asian"]] + tmp[, vars_["pop_nhpi"]]) - tmp$r_oth <- (tmp[, vars_["pop_aian"]] + tmp[, vars_["pop_other"]] + tmp[, vars_["pop_two"]]) + + if (any(c("P2_005N", "P005003") %in% all_names)) { + vars_ <- census_geo_api_names_legacy(year = year) + } + + tmp <- x[[census.geo]][, c(geo_id_names, grep("^P[0-2]", all_names, value = TRUE))] + + for (i in seq_along(vars_)) { + tmp[[names(vars_)[[i]]]] <- + rowSums(tmp[, vars_[[i]], drop = FALSE]) + } + all_names <- names(tmp) ## Totals tmp_la <- tmp[, c(geo_id_names, grep("^r_", all_names, value = TRUE))] diff --git a/man/modfuns.Rd b/man/modfuns.Rd index 403d2d2..f852a26 100644 --- a/man/modfuns.Rd +++ b/man/modfuns.Rd @@ -48,7 +48,7 @@ predict_race_me( year = "2020", age = FALSE, sex = FALSE, - census.geo, + census.geo = c("tract", "block", "block_group", "county", "place", "zcta"), census.key = Sys.getenv("CENSUS_API_KEY"), name.dictionaries, surname.only = FALSE, From 633339826db86c4069d660afe8aeabe6d6b541cd Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Thu, 30 Nov 2023 20:41:05 -0800 Subject: [PATCH 30/33] fix(validate_key): if `key` is `NULL`, replace it with `Sys.getenv("CENSUS_API_KEY")` for backwards compatibility --- NAMESPACE | 1 + R/census_geo_api.R | 2 +- R/census_helper.R | 2 +- R/census_helper_v2.R | 2 +- R/get_census_data.R | 2 +- R/predict_race.R | 2 +- R/race_prediction_funs.R | 8 ++++---- R/utils_validate_key.R | 3 +++ 8 files changed, 13 insertions(+), 9 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index c6721d1..8b7120c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,7 @@ importFrom(dplyr,pull) importFrom(furrr,future_map_dfr) importFrom(piggyback,pb_download) importFrom(purrr,map_dfr) +importFrom(rlang,"%||%") importFrom(stats,rmultinom) importFrom(utils,setTxtProgressBar) importFrom(utils,txtProgressBar) diff --git a/R/census_geo_api.R b/R/census_geo_api.R index 2c505e7..6b64854 100644 --- a/R/census_geo_api.R +++ b/R/census_geo_api.R @@ -58,7 +58,7 @@ census_geo_api <- function( save_temp = NULL, counties = NULL ) { - validate_key(key) + key <- validate_key(key) geo <- tolower(geo) geo <- rlang::arg_match(geo) diff --git a/R/census_helper.R b/R/census_helper.R index f7c2816..9068663 100644 --- a/R/census_helper.R +++ b/R/census_helper.R @@ -89,7 +89,7 @@ census_helper <- function( } if (toDownload) { - validate_key(key) + key <- validate_key(key) } states <- toupper(states) diff --git a/R/census_helper_v2.R b/R/census_helper_v2.R index 309a538..eb5873d 100644 --- a/R/census_helper_v2.R +++ b/R/census_helper_v2.R @@ -88,7 +88,7 @@ census_helper_new <- function( } if (toDownload) { - validate_key(key) + key <- validate_key(key) } if (toupper(states) == "ALL") { diff --git a/R/get_census_data.R b/R/get_census_data.R index d4a4d09..9eb735d 100644 --- a/R/get_census_data.R +++ b/R/get_census_data.R @@ -49,7 +49,7 @@ get_census_data <- function( retry = 3, county.list = NULL ) { - validate_key(key) + key <- validate_key(key) census.geo <- tolower(census.geo) census.geo <- rlang::arg_match(census.geo) diff --git a/R/predict_race.R b/R/predict_race.R index bd8df9a..ed7e903 100644 --- a/R/predict_race.R +++ b/R/predict_race.R @@ -193,7 +193,7 @@ predict_race <- function( if (surname.only == FALSE && is.null(census.data)) { # Otherwise predict_race_new and predict_race_me will both # attempt to pull census_data - validate_key(census.key) + census.key <- validate_key(census.key) voter.file$state <- toupper(voter.file$state) states <- unique(voter.file$state) county.list <- split(voter.file$county, voter.file$state) diff --git a/R/race_prediction_funs.R b/R/race_prediction_funs.R index ff384f1..3184524 100644 --- a/R/race_prediction_funs.R +++ b/R/race_prediction_funs.R @@ -87,7 +87,7 @@ NULL message(paste("Proceeding with Census geographic data at", census.geo, "level...")) } if (missing(census.data) || is.null(census.data) || is.na(census.data)) { - validate_key(census.key) + census.key <- validate_key(census.key) message("Downloading Census geographic data using provided API key...") } else { if (!("state" %in% names(voter.file))) { @@ -95,7 +95,7 @@ NULL } if (sum(toupper(unique(as.character(voter.file$state))) %in% toupper(names(census.data)) == FALSE) > 0) { message("census.data object does not include all states in voter.file object.") - validate_key(census.key) + census.key <- validate_key(census.key) message("Downloading Census geographic data for states not included in census.data object...") } else { message("Using Census geographic data from provided census.data object...") @@ -346,7 +346,7 @@ predict_race_new <- function( message("Proceeding with Census geographic data at ", census.geo, " level...") if (is.null(census.data)) { - validate_key(census.key) + census.key <- validate_key(census.key) message("Downloading Census geographic data using provided API key...") } else { if (!("state" %in% names(voter.file))) { @@ -355,7 +355,7 @@ predict_race_new <- function( census_data_preflight(census.data, census.geo, year) if (sum(toupper(unique(as.character(voter.file$state))) %in% toupper(names(census.data)) == FALSE) > 0) { message("census.data object does not include all states in voter.file object.") - validate_key(census.key) + census.key <- validate_key(census.key) message("Downloading Census geographic data for states not included in census.data object...") } else { message("Using Census geographic data from provided census.data object...") diff --git a/R/utils_validate_key.R b/R/utils_validate_key.R index b31a6fc..a92d447 100644 --- a/R/utils_validate_key.R +++ b/R/utils_validate_key.R @@ -1,8 +1,11 @@ +#' @importFrom rlang %||% validate_key <- function( key, argument_name = rlang::caller_arg(key), call = rlang::caller_call() ) { + key <- key %||% Sys.getenv("CENSUS_API_KEY") + if (length(key) != 1) { cli::cli_abort( c( From bcd0f4562dfd6b4ca8caf41bada7a6ddd6a57423 Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Thu, 30 Nov 2023 21:22:18 -0800 Subject: [PATCH 31/33] test(get_census_data): add test with ZCTAs --- tests/testthat/test-get_census_data.R | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/testthat/test-get_census_data.R b/tests/testthat/test-get_census_data.R index 89e7cb9..f2e2aa9 100644 --- a/tests/testthat/test-get_census_data.R +++ b/tests/testthat/test-get_census_data.R @@ -55,4 +55,13 @@ if(Sys.getenv("CENSUS_API_KEY") != "") { expect_true(all(r$RI$place$state == "RI")) }) + test_that("Census ZCTA download works", { + r <- suppressMessages(get_census_data( + key = NULL, + state = "DC", + census.geo = "zcta" + )) + expect_named(r$DC, c("state", "age", "sex", "year", "zcta")) + expect_true(all(r$DC$zcta$state == "DC")) + }) } From b374610adbeeab0e54000255510eae717f2109ce Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Thu, 30 Nov 2023 21:29:37 -0800 Subject: [PATCH 32/33] chore: add `.lazytest` to `.gitignore` (see https://lazytest.cynkra.com/) --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index dc83fda..0f2e051 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ # RStudio files .Rproj.user/ .Rproj +.lazytest # produced vignettes vignettes/*.html From ec10a952d558879acdeff03be683647adb1ce869 Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Thu, 30 Nov 2023 21:32:55 -0800 Subject: [PATCH 33/33] fix(census_helper_new): pass `year` to `census_geo_api()` --- R/census_helper_v2.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/census_helper_v2.R b/R/census_helper_v2.R index eb5873d..44b7ae4 100644 --- a/R/census_helper_v2.R +++ b/R/census_helper_v2.R @@ -111,7 +111,7 @@ census_helper_new <- function( # Only those counties within the target state counties = unique(voter.file$county[voter.file$state == state])) } else { - census <- census_geo_api(key, state, geo = "tract", age, sex, retry) + census <- census_geo_api(key, state, geo = "tract", age, sex, year, retry) } } else { census <- census.data[[toupper(state)]]$tract @@ -124,7 +124,7 @@ census_helper_new <- function( # Only those counties within the target state counties = unique(voter.file$county[voter.file$state == state])) } else { - census <- census_geo_api(key, state, geo = "block_group", age, sex, retry) + census <- census_geo_api(key, state, geo = "block_group", age, sex, year, retry) } } else { @@ -143,7 +143,7 @@ census_helper_new <- function( # Only those counties within the target state counties = unique(voter.file$county[voter.file$state == state])) } else { - census <- census_geo_api(key, state, geo = "block", age, sex, retry) + census <- census_geo_api(key, state, geo = "block", age, sex, year, retry) } } else { @@ -159,7 +159,7 @@ census_helper_new <- function( census.data[[state]]$sex != FALSE if (state_must_be_downloaded) { - census <- census_geo_api(key, state, geo = geo, age, sex, retry) + census <- census_geo_api(key, state, geo = geo, age, sex, year, retry) } else { census <- census.data[[state]][[geo]] }