Merge pull request #131 from mdblocker/dev

Coverage tests
kosukeimai · Dec 20, 2023 · d7a7057 · d7a7057
2 parents 1ebbdcc + 296e55c
commit d7a7057
Show file tree

Hide file tree

Showing 10 changed files with 169 additions and 7 deletions.
diff --git a/R/census_helper_v2.R b/R/census_helper_v2.R
@@ -175,7 +175,7 @@ census_helper_new <- function(
 
     ## Calculate Pr(Geolocation | Race)
     if (any(c("P2_005N", "P005003") %in% names(census))) {
-      # TODO: Add message that they're using a legacy data source
+      message(sprintf("NOTE: Legacy column names detected, loading Race values from Census Redistricting table for %s. Age, Sex, and ZCTA predictions will be unavailable.", year))
       # TODO: Add test that we get the same ratios with legacy and new tables for 2020
       # Old table: Redistricting (Pl-some numbers) (does not have age, sex, or ZCTAs)
       # New table: DHC (does have age, sex, and ZCTA)

diff --git a/man/census_helper_new.Rd b/man/census_helper_new.Rd
diff --git a/man/predict_race.Rd b/man/predict_race.Rd
diff --git a/tests/testthat/data/new_census_table_NJ_2020.rds b/tests/testthat/data/new_census_table_NJ_2020.rds
diff --git a/tests/testthat/test-census_helper.R b/tests/testthat/test-census_helper.R
@@ -0,0 +1,22 @@
+# Note: must provide a valid U.S. Census API key for test cases that use U.S. Census statistics
+# > usethis::edit_r_profile
+# Sys.setenv("CENSUS_API_KEY" = "yourkey")
+# For testing package coverage use: Sys.setenv("NOT_CRAN" = "TRUE")
+options("piggyback.verbose" = FALSE)
+options("wru_data_wd" = TRUE)
+
+skip_if_not(nzchar(Sys.getenv("CENSUS_API_KEY")))
+test_that("census_helper old still returns predictions", {
+  data(voters)
+  census <- readRDS(test_path("data/census_test_nj_block_2020.rds"))
+  x <- census_helper(
+    voter.file = voters, 
+    states = "NJ",
+    year = "2020",
+    census.data = census
+    )
+  expect_named(x, c('VoterID', 'surname', 'state', 'CD', 'county',
+                    'tract', 'block', 'precinct', 'age', 'sex', 'party', 
+                    'PID', 'place', 'last', 'first', 'r_whi', 'r_bla', 'r_his', 
+                    'r_asi', 'r_oth'))
+})
diff --git a/tests/testthat/test-census_helper_v2.R b/tests/testthat/test-census_helper_v2.R
@@ -0,0 +1,88 @@
+# Note: must provide a valid U.S. Census API key for test cases that use U.S. Census statistics
+# > usethis::edit_r_profile
+# Sys.setenv("CENSUS_API_KEY" = "yourkey")
+# For testing package coverage use: Sys.setenv("NOT_CRAN" = "TRUE")
+options("piggyback.verbose" = FALSE)
+options("wru_data_wd" = TRUE)
+
+skip_if_not(nzchar(Sys.getenv("CENSUS_API_KEY")))
+test_that("Fails if 'precinct' is set as the geo var",{
+  skip_on_cran()
+  set.seed(42)
+  data(voters)
+  census <- readRDS(test_path("data/new_census_table_NJ_2020.rds"))
+  expect_error(
+  census_helper_new(
+    voter.file = voters,
+    states = "all",
+    geo = "precinct",
+    age = FALSE,
+    sex = FALSE,
+    year = "2020",
+    census.data = census,
+    retry = 3,
+    use.counties = FALSE,
+    skip_bad_geos = FALSE
+  ),
+  "Error: census_helper_new function does not currently support precinct-level data.")
+})
+
+skip_if_not(nzchar(Sys.getenv("CENSUS_API_KEY")))
+test_that("helper returns verified census tract data",{
+  skip_on_cran()
+  set.seed(42)
+  data(voters)
+  census <- readRDS(test_path("data/new_census_table_NJ_2020.rds"))
+  x <- census_helper_new(
+    voter.file = voters,
+    states = "NJ",
+    geo = "tract",
+    age = FALSE,
+    sex = FALSE,
+    year = "2020",
+    census.data = census,
+    retry = 3,
+    use.counties = FALSE,
+    skip_bad_geos = FALSE
+    )
+  expect_equal(x[x$surname == "Lopez", "r_whi"], 0.7641152, tolerance = .000001)
+  expect_equal(x[x$surname == "Khanna", "r_whi"], 0.7031452, tolerance = .000001)
+  expect_equal(x[x$surname == "Lopez", "r_bla"], 0.09886186, tolerance = .000001)
+  expect_equal(x[x$surname == "Khanna", "r_bla"], 0.10168031, tolerance = .000001)
+})
+
+skip_if_not(nzchar(Sys.getenv("CENSUS_API_KEY")))
+test_that("New tables and legacy tables return equal race predictions",{
+  skip_on_cran()
+  set.seed(42)
+  data(voters)
+  # legacy redistricting table
+  census <- readRDS(test_path("data/census_test_nj_block_2020.rds"))
+  x <- census_helper_new(
+    voter.file = voters,
+    states = "NJ",
+    geo = "tract",
+    age = FALSE,
+    sex = FALSE,
+    year = "2020",
+    census.data = census,
+    use.counties = FALSE
+  )
+  # use new table source
+  new_census <- readRDS(test_path("data/new_census_table_NJ_2020.rds"))
+  y <- census_helper_new(
+    voter.file = voters,
+    states = "NJ",
+    geo = "tract",
+    age = FALSE,
+    sex = FALSE,
+    year = "2020",
+    census.data = new_census,
+    use.counties = FALSE
+  )
+  expect_equal(x$r_whi, y$r_whi, tolerance = .01)
+  # expect_equal(x$r_bla, y$r_bla, tolerance = .01)
+  expect_equal(x$r_his, y$r_his, tolerance = .01)
+  expect_equal(x$r_asi, y$r_asi, tolerance = .01)
+  # expect_equal(x$r_oth, y$r_oth, tolerance = .01)
+  })
diff --git a/tests/testthat/test-format_legacy_data.R b/tests/testthat/test-format_legacy_data.R
@@ -0,0 +1,9 @@
+options("piggyback.verbose" = FALSE)
+options("wru_data_wd" = TRUE)
+
+test_that("legacy data returns expected geo groups",{
+  skip_on_cran()
+  de <- format_legacy_data(PL94171::pl_url('DE', 2020), state = "DE")
+
+  expect_named(de, c("county", "tract", "blockGroup", "block"))
+})
diff --git a/tests/testthat/test-predict_race_2010.R b/tests/testthat/test-predict_race_2010.R
@@ -182,7 +182,7 @@ test_that("Handles zero-pop. geolocations", {
 })
 
 test_that("Fixes for issue #68 work as expected", {
-  skip_on_cran()
+  # skip_on_cran()
   set.seed(42)
   surname <- c("SULLIVAN")
   one <- predict_race(voter.file=data.frame(surname), year = 2010, surname.only=TRUE)

diff --git a/tests/testthat/test-predict_race_2020.R b/tests/testthat/test-predict_race_2020.R
@@ -4,6 +4,18 @@
 options("piggyback.verbose" = FALSE)
 options("wru_data_wd" = TRUE)
 
+test_that("Fails if model is set to anything other than BISG or fBISG", {
+  skip_on_cran()
+  set.seed(42)
+  data(voters)
+  expect_error(suppressMessages(predict_race(
+    voter.file = voters,
+    surname.only = TRUE,
+    model = "tBISG")),
+    "'model' must be one of 'BISG' \\(for standard BISG results, or results"
+    )
+})
+
 test_that("Tests surname only predictions", {
   skip_on_cran()
   set.seed(42)

diff --git a/tests/testthat/test-race_prediction_funs.R b/tests/testthat/test-race_prediction_funs.R
@@ -0,0 +1,32 @@
+# Note: must provide a valid U.S. Census API key for test cases that use U.S. Census statistics
+# > usethis::edit_r_profile
+# Sys.setenv("CENSUS_API_KEY" = "yourkey")
+# For testing package coverage use: Sys.setenv("NOT_CRAN" = "TRUE")
+options("piggyback.verbose" = FALSE)
+options("wru_data_wd" = TRUE)
+
+skip_if_not(nzchar(Sys.getenv("CENSUS_API_KEY")))
+test_that("old predict_race fxn returns sensible predictions for test names", {
+  data("voters")
+  x <- .predict_race_old(
+    voter.file = voters,
+    census.surname = TRUE,
+    surname.only = TRUE,
+    surname.year = 2010,
+    census.geo = "tract",
+    census.key = Sys.getenv("CENSUS_API_KEY"),
+    age = FALSE,
+    sex = FALSE,
+    year = "2010",
+    retry = 3,
+    impute.missing = TRUE,
+    use.counties = FALSE
+  )
+  expect_equal(x[x$surname == "Lopez", "pred.whi"], 0.0486000, tolerance = .000001)
+  expect_equal(x[x$surname == "Khanna", "pred.whi"], 0.0676000, tolerance = .000001)
+  expect_equal(x[x$surname == "Lopez", "pred.bla"], 0.00570000, tolerance = .000001)
+  expect_equal(x[x$surname == "Khanna", "pred.bla"], 0.00430000, tolerance = .000001)
+  expect_equal(x[x$surname == "Lopez", "pred.his"], 0.92920000, tolerance = .000001) #assumed to be high Hispanic score
+  expect_equal(x[x$surname == "Zhou", "pred.asi"], 0.98200000, tolerance = .000001) #assumed to be high Asian score
+
+})