diff --git a/R/census_geo_api.R b/R/census_geo_api.R index 80cc903..9de3093 100644 --- a/R/census_geo_api.R +++ b/R/census_geo_api.R @@ -20,6 +20,7 @@ #' sex or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). #' If \code{TRUE}, function will return Pr(Geolocation, Sex | Race). #' If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race). +#' @param retry The number of retries at the census website if network interruption occurs. #' @return Output will be an object of class \code{list}, indexed by state names. It will #' consist of the original user-input data with additional columns of Census geographic data. #' @@ -33,7 +34,7 @@ #' available \href{http://rstudio-pubs-static.s3.amazonaws.com/19337_2e7f827190514c569ea136db788ce850.html}{here}. #' #' @export -census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE) { +census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE, retry = 0) { if (missing(key)) { stop('Must enter U.S. Census API key, which can be requested at http://api.census.gov/data/key_signup.html.') @@ -81,8 +82,7 @@ census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE) if (geo == "county") { geo.merge <- c("state", "county") region <- paste("for=county:*&in=state:", state.fips, sep = "") - census <- get_census_api("http://api.census.gov/data/2010/sf1?", - key = key, vars = vars, region = region) + census <- get_census_api("http://api.census.gov/data/2010/sf1?", key = key, vars = vars, region = region, retry) } if (geo == "tract") { @@ -90,15 +90,14 @@ census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE) geo.merge <- c("state", "county", "tract") region_county <- paste("for=county:*&in=state:", state.fips, sep = "") - county_df <- get_census_api("http://api.census.gov/data/2010/sf1?", key = key, vars = vars, region = region_county) + county_df <- get_census_api("http://api.census.gov/data/2010/sf1?", key = key, vars = vars, region = region_county, retry) county_list <- county_df$county census <- NULL for (c in 1:length(county_list)) { print(paste("County ", c, " of ", length(county_list), ": ", county_list[c], sep = "")) region_county <- paste("for=tract:*&in=state:", state.fips, "+county:", county_list[c], sep = "") - census.temp <- get_census_api("http://api.census.gov/data/2010/sf1?", - key = key, vars = vars, region = region_county) + census.temp <- get_census_api("http://api.census.gov/data/2010/sf1?", key = key, vars = vars, region = region_county, retry) census <- rbind(census, census.temp) } rm(census.temp) @@ -109,7 +108,7 @@ census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE) geo.merge <- c("state", "county", "tract", "block") region_county <- paste("for=county:*&in=state:", state.fips, sep = "") - county_df <- get_census_api("http://api.census.gov/data/2010/sf1?", key = key, vars = vars, region = region_county) + county_df <- get_census_api("http://api.census.gov/data/2010/sf1?", key = key, vars = vars, region = region_county, retry) county_list <- county_df$county census <- NULL @@ -119,16 +118,14 @@ census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE) region_tract <- paste("for=tract:*&in=state:", state.fips, "+county:", county_list[c], sep = "") print(region_tract) - tract_df <- get_census_api("http://api.census.gov/data/2010/sf1?", - key = key, vars = vars, region = region_tract) + tract_df <- get_census_api("http://api.census.gov/data/2010/sf1?", key = key, vars = vars, region = region_tract, retry) tract_list <- tract_df$tract for (t in 1:length(tract_list)) { print(paste("Tract ", t, " of ", length(tract_list), ": ", tract_list[t], sep = "")) region_block <- paste("for=block:*&in=state:", state.fips, "+county:", county_list[c], "+tract:", tract_list[t], sep = "") - census.temp <- get_census_api("http://api.census.gov/data/2010/sf1?", - key = key, vars = vars, region = region_block) + census.temp <- get_census_api("http://api.census.gov/data/2010/sf1?", key = key, vars = vars, region = region_block, retry) census <- rbind(census, census.temp) } } diff --git a/R/census_helper.R b/R/census_helper.R index 2e12c20..7321cd5 100644 --- a/R/census_helper.R +++ b/R/census_helper.R @@ -35,6 +35,7 @@ #' \code{FALSE} in both). Similarly, the \code{\var{sex}} element in the object provided in #' \code{\var{census.data}} must have the same value as the \code{\var{sex}} option here. #' If \code{\var{census.data}} is missing, Census geographic data will be obtained via Census API. +#' @param retry The number of retries at the census website if network interruption occurs. #' @return Output will be an object of class \code{data.frame}. It will #' consist of the original user-input data with additional columns of #' Census data. @@ -45,7 +46,7 @@ #' \dontrun{census_helper(key = "...", voter.file = voters, states = "all", geo = "tract", age = TRUE, sex = TRUE)} #' #' @export -census_helper <- function(key, voter.file, states = "all", geo = "tract", age = FALSE, sex = FALSE, census.data = NA) { +census_helper <- function(key, voter.file, states = "all", geo = "tract", age = FALSE, sex = FALSE, census.data = NA, retry = 0) { if (is.na(census.data) || (typeof(census.data) != "list")) { toDownload = TRUE @@ -74,7 +75,7 @@ census_helper <- function(key, voter.file, states = "all", geo = "tract", age = if (geo == "county") { geo.merge <- c("state", "county") if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { - census <- census_geo_api(key, state, geo = "county", age, sex) + census <- census_geo_api(key, state, geo = "county", age, sex, retry) } else { census <- census.data[[state]]$county } @@ -83,7 +84,7 @@ census_helper <- function(key, voter.file, states = "all", geo = "tract", age = if (geo == "tract") { geo.merge <- c("state", "county", "tract") if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { - census <- census_geo_api(key, state, geo = "tract", age, sex) + census <- census_geo_api(key, state, geo = "tract", age, sex, retry) } else { census <- census.data[[state]]$tract } @@ -92,7 +93,7 @@ census_helper <- function(key, voter.file, states = "all", geo = "tract", age = if (geo == "block") { geo.merge <- c("state", "county", "tract", "block") if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { - census <- census_geo_api(key, state, geo = "block", age, sex) + census <- census_geo_api(key, state, geo = "block", age, sex, retry) } else { census <- census.data[[state]]$block } diff --git a/R/get_census_api.R b/R/get_census_api.R index 63ac9d8..c982a03 100644 --- a/R/get_census_api.R +++ b/R/get_census_api.R @@ -16,6 +16,7 @@ #' @param region Character object specifying which region to obtain data for. #' Must contain "for" and possibly "in", #' e.g., \code{"for=block:1213&in=state:47+county:015+tract:*"}. +#' @param retry The number of retries at the census website if network interruption occurs. #' @return If successful, output will be an object of class \code{data.frame}. #' If unsuccessful, function prints the URL query that caused the error. #' @@ -28,15 +29,15 @@ #' \href{http://rstudio-pubs-static.s3.amazonaws.com/19337_2e7f827190514c569ea136db788ce850.html}{here}. #' #' @export -get_census_api <- function(data_url, key, vars, region) { +get_census_api <- function(data_url, key, vars, region, retry = 0) { if(length(vars) > 50){ vars <- vec_to_chunk(vars) # Split variables into a list get <- lapply(vars, function(x) paste(x, sep='', collapse=",")) - data <- lapply(vars, function(x) get_census_api_2(data_url,key, x, region)) + data <- lapply(vars, function(x) get_census_api_2(data_url,key, x, region, retry)) } else { get <- paste(vars, sep='', collapse=',') - data <- list(get_census_api_2(data_url, key, get, region)) + data <- list(get_census_api_2(data_url, key, get, region, retry)) } ## Format output. If there were no errors, than paste the data together. If there is an error, just return the unformatted list. diff --git a/R/get_census_api_2.R b/R/get_census_api_2.R index 4c7d399..392f57b 100644 --- a/R/get_census_api_2.R +++ b/R/get_census_api_2.R @@ -17,6 +17,7 @@ #' @param region Character object specifying which region to obtain data for. #' Must contain "for" and possibly "in", #' e.g., \code{"for=block:1213&in=state:47+county:015+tract:*"}. +#' @param retry The number of retries at the census website if network interruption occurs. #' @return If successful, output will be an object of class \code{data.frame}. #' If unsuccessful, function prints the URL query that was constructed. #' @@ -29,23 +30,20 @@ #' \href{http://rstudio-pubs-static.s3.amazonaws.com/19337_2e7f827190514c569ea136db788ce850.html}{here}. #' #' @export -get_census_api_2 <- function(data_url, key, get, region){ +get_census_api_2 <- function(data_url, key, get, region, retry = 0){ if(length(get)>1) { get <- paste(get, collapse=',', sep='') } api_call <- paste(data_url, 'key=', key, '&get=', get, '&', region, sep='') dat_raw <- try(readLines(api_call, warn="F")) -# -# The following will pause and retry the census website access. But we are not using it right now. -# -# retry = 5 -# while ((class(dat_raw) == 'try-error') && (retry > 0)) { -# print(paste("Try census server again:", data_url)) -# Sys.sleep(1) -# retry <- retry - 1 -# dat_raw <- try(readLines(api_call, warn="F")) -# } -# + + while ((class(dat_raw) == 'try-error') && (retry > 0)) { + print(paste("Try census server again:", data_url)) + Sys.sleep(1) + retry <- retry - 1 + dat_raw <- try(readLines(api_call, warn="F")) + } + if(class(dat_raw) == 'try-error') { print("Data access failure at the census website, please try again by re-run the previous command") stop(print(api_call)) diff --git a/R/get_census_data.R b/R/get_census_data.R index 9f2c61c..e174288 100644 --- a/R/get_census_data.R +++ b/R/get_census_data.R @@ -18,6 +18,7 @@ #' @param census.geo An optional character vector specifying what level of #' geography to use to merge in U.S. Census 2010 geographic data. Currently #' \code{"county"}, \code{"tract"}, or \code{"block"} are supported. +#' @param retry The number of retries at the census website if network interruption occurs. #' @return Output will be an object of class \code{list} indexed by state. #' Output will contain the following elements: \code{state}, \code{age}, \code{sex}, #' \code{county}, \code{tract} and \code{block}. @@ -25,7 +26,7 @@ #' @export #' #' @examples \dontrun{get_census_data(key = "...", states = c("NJ", "NY"), age = TRUE, sex = FALSE)} -get_census_data <- function(key, states, age = FALSE, sex = FALSE, census.geo = "block") { +get_census_data <- function(key, states, age = FALSE, sex = FALSE, census.geo = "block", retry = 0) { if (missing(key)) { stop('Must enter valid Census API key, which can be requested at http://api.census.gov/data/key_signup.html.') @@ -37,15 +38,15 @@ get_census_data <- function(key, states, age = FALSE, sex = FALSE, census.geo = for (s in states) { CensusObj[[s]] <- list(state = s, age = age, sex = sex) if (census.geo == "block") { - block <- census_geo_api(key, s, geo = "block", age, sex) + block <- census_geo_api(key, s, geo = "block", age, sex, retry) CensusObj[[s]]$block <- block } if ((census.geo == "block") || (census.geo == "tract")) { - tract <- census_geo_api(key, s, geo = "tract", age, sex) + tract <- census_geo_api(key, s, geo = "tract", age, sex, retry) CensusObj[[s]]$tract <- tract } if ((census.geo == "block") || (census.geo == "tract") || (census.geo == "county")) { - county <- census_geo_api(key, s, geo = "county", age, sex) + county <- census_geo_api(key, s, geo = "county", age, sex, retry) CensusObj[[s]]$county <- county } } diff --git a/R/predict_race.R b/R/predict_race.R index a4b7921..8a927eb 100644 --- a/R/predict_race.R +++ b/R/predict_race.R @@ -67,6 +67,7 @@ #' on individual's party registration (in addition to geolocation). #' Whatever the name of the party registration field in \code{\var{voter.file}}, #' it should be coded as 1 for Democrat, 2 for Republican, and 0 for Other. +#' @param retry The number of retries at the census website if network interruption occurs. #' @return Output will be an object of class \code{data.frame}. It will #' consist of the original user-input data with additional columns with #' predicted probabilities for each of the five major racial categories: @@ -91,7 +92,7 @@ ## Race Prediction Function predict_race <- function(voter.file, census.surname = TRUE, surname.only = FALSE, surname.year = 2010, - census.geo, census.key, census.data = NA, age = FALSE, sex = FALSE, party) { + census.geo, census.key, census.data = NA, age = FALSE, sex = FALSE, party, retry = 0) { if (!missing(census.geo) && (census.geo == "precinct")) { # geo <- "precinct" @@ -181,7 +182,7 @@ predict_race <- function(voter.file, geo = "block", age = age, sex = sex, - census.data = census.data) + census.data = census.data, retry = retry) } if (census.geo == "precinct") { @@ -199,7 +200,7 @@ predict_race <- function(voter.file, geo = "tract", age = age, sex = sex, - census.data = census.data) + census.data = census.data, retry = retry) } if (census.geo == "county") { @@ -212,7 +213,7 @@ predict_race <- function(voter.file, geo = "county", age = age, sex = sex, - census.data = census.data) + census.data = census.data, retry = retry) } ## Pr(Race | Surname, Geolocation) diff --git a/man/census_geo_api.Rd b/man/census_geo_api.Rd index 55898fe..792810f 100644 --- a/man/census_geo_api.Rd +++ b/man/census_geo_api.Rd @@ -4,7 +4,8 @@ \alias{census_geo_api} \title{Census Data download function.} \usage{ -census_geo_api(key, state, geo = "tract", age = FALSE, sex = FALSE) +census_geo_api(key, state, geo = "tract", age = FALSE, sex = FALSE, + retry = 0) } \arguments{ \item{key}{A required character object. Must contain user's Census API @@ -26,6 +27,8 @@ If \code{\var{sex}} is also \code{TRUE}, function will return Pr(Geolocation, Ag sex or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). If \code{TRUE}, function will return Pr(Geolocation, Sex | Race). If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race).} + +\item{retry}{The number of retries at the census website if network interruption occurs.} } \value{ Output will be an object of class \code{list}, indexed by state names. It will diff --git a/man/census_helper.Rd b/man/census_helper.Rd index ce0483a..6a2dc5f 100644 --- a/man/census_helper.Rd +++ b/man/census_helper.Rd @@ -5,7 +5,7 @@ \title{Census helper function.} \usage{ census_helper(key, voter.file, states = "all", geo = "tract", age = FALSE, - sex = FALSE, census.data = NA) + sex = FALSE, census.data = NA, retry = 0) } \arguments{ \item{key}{A required character object. Must contain user's Census API @@ -42,6 +42,8 @@ as the \code{\var{age}} option specified in this function (i.e., \code{TRUE} in \code{FALSE} in both). Similarly, the \code{\var{sex}} element in the object provided in \code{\var{census.data}} must have the same value as the \code{\var{sex}} option here. If \code{\var{census.data}} is missing, Census geographic data will be obtained via Census API.} + +\item{retry}{The number of retries at the census website if network interruption occurs.} } \value{ Output will be an object of class \code{data.frame}. It will diff --git a/man/get_census_api.Rd b/man/get_census_api.Rd index 3ddec0d..bb94659 100644 --- a/man/get_census_api.Rd +++ b/man/get_census_api.Rd @@ -4,7 +4,7 @@ \alias{get_census_api} \title{Census API function.} \usage{ -get_census_api(data_url, key, vars, region) +get_census_api(data_url, key, vars, region, retry = 0) } \arguments{ \item{data_url}{URL root of the API, including the question mark, @@ -21,6 +21,8 @@ split variables into separate queries.} \item{region}{Character object specifying which region to obtain data for. Must contain "for" and possibly "in", e.g., \code{"for=block:1213&in=state:47+county:015+tract:*"}.} + +\item{retry}{The number of retries at the census website if network interruption occurs.} } \value{ If successful, output will be an object of class \code{data.frame}. diff --git a/man/get_census_api_2.Rd b/man/get_census_api_2.Rd index fffc34e..947330c 100644 --- a/man/get_census_api_2.Rd +++ b/man/get_census_api_2.Rd @@ -4,7 +4,7 @@ \alias{get_census_api_2} \title{Census API URL assembler.} \usage{ -get_census_api_2(data_url, key, get, region) +get_census_api_2(data_url, key, get, region, retry = 0) } \arguments{ \item{data_url}{URL root of the API, including the question mark, @@ -21,6 +21,8 @@ split variables into separate queries.} \item{region}{Character object specifying which region to obtain data for. Must contain "for" and possibly "in", e.g., \code{"for=block:1213&in=state:47+county:015+tract:*"}.} + +\item{retry}{The number of retries at the census website if network interruption occurs.} } \value{ If successful, output will be an object of class \code{data.frame}. diff --git a/man/get_census_data.Rd b/man/get_census_data.Rd index ced20a4..b12f313 100644 --- a/man/get_census_data.Rd +++ b/man/get_census_data.Rd @@ -5,7 +5,7 @@ \title{Multilevel Census data download function.} \usage{ get_census_data(key, states, age = FALSE, sex = FALSE, - census.geo = "block") + census.geo = "block", retry = 0) } \arguments{ \item{key}{A required character object containing a valid Census API key, @@ -26,6 +26,8 @@ If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Ag \item{census.geo}{An optional character vector specifying what level of geography to use to merge in U.S. Census 2010 geographic data. Currently \code{"county"}, \code{"tract"}, or \code{"block"} are supported.} + +\item{retry}{The number of retries at the census website if network interruption occurs.} } \value{ Output will be an object of class \code{list} indexed by state. diff --git a/man/predict_race.Rd b/man/predict_race.Rd index f666158..0428f58 100644 --- a/man/predict_race.Rd +++ b/man/predict_race.Rd @@ -6,7 +6,7 @@ \usage{ predict_race(voter.file, census.surname = TRUE, surname.only = FALSE, surname.year = 2010, census.geo, census.key, census.data = NA, - age = FALSE, sex = FALSE, party) + age = FALSE, sex = FALSE, party, retry = 0) } \arguments{ \item{voter.file}{An object of class \code{data.frame}. @@ -80,6 +80,8 @@ If specified, race/ethnicity predictions will be conditioned on individual's party registration (in addition to geolocation). Whatever the name of the party registration field in \code{\var{voter.file}}, it should be coded as 1 for Democrat, 2 for Republican, and 0 for Other.} + +\item{retry}{The number of retries at the census website if network interruption occurs.} } \value{ Output will be an object of class \code{data.frame}. It will