Skip to content

Commit

Permalink
Merge pull request #25 from HJ08003/master
Browse files Browse the repository at this point in the history
Add the retry parameter so that the census access will retry certain times
  • Loading branch information
kkprinceton authored Apr 10, 2017
2 parents 48a0f63 + 50504a3 commit 5aa2de8
Show file tree
Hide file tree
Showing 12 changed files with 56 additions and 44 deletions.
19 changes: 8 additions & 11 deletions R/census_geo_api.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#' sex or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race).
#' If \code{TRUE}, function will return Pr(Geolocation, Sex | Race).
#' If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race).
#' @param retry The number of retries at the census website if network interruption occurs.
#' @return Output will be an object of class \code{list}, indexed by state names. It will
#' consist of the original user-input data with additional columns of Census geographic data.
#'
Expand All @@ -33,7 +34,7 @@
#' available \href{http://rstudio-pubs-static.s3.amazonaws.com/19337_2e7f827190514c569ea136db788ce850.html}{here}.
#'
#' @export
census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE) {
census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE, retry = 0) {

if (missing(key)) {
stop('Must enter U.S. Census API key, which can be requested at http://api.census.gov/data/key_signup.html.')
Expand Down Expand Up @@ -81,24 +82,22 @@ census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE)
if (geo == "county") {
geo.merge <- c("state", "county")
region <- paste("for=county:*&in=state:", state.fips, sep = "")
census <- get_census_api("http://api.census.gov/data/2010/sf1?",
key = key, vars = vars, region = region)
census <- get_census_api("http://api.census.gov/data/2010/sf1?", key = key, vars = vars, region = region, retry)
}

if (geo == "tract") {

geo.merge <- c("state", "county", "tract")

region_county <- paste("for=county:*&in=state:", state.fips, sep = "")
county_df <- get_census_api("http://api.census.gov/data/2010/sf1?", key = key, vars = vars, region = region_county)
county_df <- get_census_api("http://api.census.gov/data/2010/sf1?", key = key, vars = vars, region = region_county, retry)
county_list <- county_df$county

census <- NULL
for (c in 1:length(county_list)) {
print(paste("County ", c, " of ", length(county_list), ": ", county_list[c], sep = ""))
region_county <- paste("for=tract:*&in=state:", state.fips, "+county:", county_list[c], sep = "")
census.temp <- get_census_api("http://api.census.gov/data/2010/sf1?",
key = key, vars = vars, region = region_county)
census.temp <- get_census_api("http://api.census.gov/data/2010/sf1?", key = key, vars = vars, region = region_county, retry)
census <- rbind(census, census.temp)
}
rm(census.temp)
Expand All @@ -109,7 +108,7 @@ census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE)
geo.merge <- c("state", "county", "tract", "block")

region_county <- paste("for=county:*&in=state:", state.fips, sep = "")
county_df <- get_census_api("http://api.census.gov/data/2010/sf1?", key = key, vars = vars, region = region_county)
county_df <- get_census_api("http://api.census.gov/data/2010/sf1?", key = key, vars = vars, region = region_county, retry)
county_list <- county_df$county

census <- NULL
Expand All @@ -119,16 +118,14 @@ census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE)

region_tract <- paste("for=tract:*&in=state:", state.fips, "+county:", county_list[c], sep = "")
print(region_tract)
tract_df <- get_census_api("http://api.census.gov/data/2010/sf1?",
key = key, vars = vars, region = region_tract)
tract_df <- get_census_api("http://api.census.gov/data/2010/sf1?", key = key, vars = vars, region = region_tract, retry)
tract_list <- tract_df$tract

for (t in 1:length(tract_list)) {
print(paste("Tract ", t, " of ", length(tract_list), ": ", tract_list[t], sep = ""))

region_block <- paste("for=block:*&in=state:", state.fips, "+county:", county_list[c], "+tract:", tract_list[t], sep = "")
census.temp <- get_census_api("http://api.census.gov/data/2010/sf1?",
key = key, vars = vars, region = region_block)
census.temp <- get_census_api("http://api.census.gov/data/2010/sf1?", key = key, vars = vars, region = region_block, retry)
census <- rbind(census, census.temp)
}
}
Expand Down
9 changes: 5 additions & 4 deletions R/census_helper.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#' \code{FALSE} in both). Similarly, the \code{\var{sex}} element in the object provided in
#' \code{\var{census.data}} must have the same value as the \code{\var{sex}} option here.
#' If \code{\var{census.data}} is missing, Census geographic data will be obtained via Census API.
#' @param retry The number of retries at the census website if network interruption occurs.
#' @return Output will be an object of class \code{data.frame}. It will
#' consist of the original user-input data with additional columns of
#' Census data.
Expand All @@ -45,7 +46,7 @@
#' \dontrun{census_helper(key = "...", voter.file = voters, states = "all", geo = "tract", age = TRUE, sex = TRUE)}
#'
#' @export
census_helper <- function(key, voter.file, states = "all", geo = "tract", age = FALSE, sex = FALSE, census.data = NA) {
census_helper <- function(key, voter.file, states = "all", geo = "tract", age = FALSE, sex = FALSE, census.data = NA, retry = 0) {

if (is.na(census.data) || (typeof(census.data) != "list")) {
toDownload = TRUE
Expand Down Expand Up @@ -74,7 +75,7 @@ census_helper <- function(key, voter.file, states = "all", geo = "tract", age =
if (geo == "county") {
geo.merge <- c("state", "county")
if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) {
census <- census_geo_api(key, state, geo = "county", age, sex)
census <- census_geo_api(key, state, geo = "county", age, sex, retry)
} else {
census <- census.data[[state]]$county
}
Expand All @@ -83,7 +84,7 @@ census_helper <- function(key, voter.file, states = "all", geo = "tract", age =
if (geo == "tract") {
geo.merge <- c("state", "county", "tract")
if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) {
census <- census_geo_api(key, state, geo = "tract", age, sex)
census <- census_geo_api(key, state, geo = "tract", age, sex, retry)
} else {
census <- census.data[[state]]$tract
}
Expand All @@ -92,7 +93,7 @@ census_helper <- function(key, voter.file, states = "all", geo = "tract", age =
if (geo == "block") {
geo.merge <- c("state", "county", "tract", "block")
if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) {
census <- census_geo_api(key, state, geo = "block", age, sex)
census <- census_geo_api(key, state, geo = "block", age, sex, retry)
} else {
census <- census.data[[state]]$block
}
Expand Down
7 changes: 4 additions & 3 deletions R/get_census_api.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#' @param region Character object specifying which region to obtain data for.
#' Must contain "for" and possibly "in",
#' e.g., \code{"for=block:1213&in=state:47+county:015+tract:*"}.
#' @param retry The number of retries at the census website if network interruption occurs.
#' @return If successful, output will be an object of class \code{data.frame}.
#' If unsuccessful, function prints the URL query that caused the error.
#'
Expand All @@ -28,15 +29,15 @@
#' \href{http://rstudio-pubs-static.s3.amazonaws.com/19337_2e7f827190514c569ea136db788ce850.html}{here}.
#'
#' @export
get_census_api <- function(data_url, key, vars, region) {
get_census_api <- function(data_url, key, vars, region, retry = 0) {
if(length(vars) > 50){
vars <- vec_to_chunk(vars) # Split variables into a list
get <- lapply(vars, function(x) paste(x, sep='', collapse=","))
data <- lapply(vars, function(x) get_census_api_2(data_url,key, x, region))
data <- lapply(vars, function(x) get_census_api_2(data_url,key, x, region, retry))
}
else {
get <- paste(vars, sep='', collapse=',')
data <- list(get_census_api_2(data_url, key, get, region))
data <- list(get_census_api_2(data_url, key, get, region, retry))
}

## Format output. If there were no errors, than paste the data together. If there is an error, just return the unformatted list.
Expand Down
22 changes: 10 additions & 12 deletions R/get_census_api_2.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#' @param region Character object specifying which region to obtain data for.
#' Must contain "for" and possibly "in",
#' e.g., \code{"for=block:1213&in=state:47+county:015+tract:*"}.
#' @param retry The number of retries at the census website if network interruption occurs.
#' @return If successful, output will be an object of class \code{data.frame}.
#' If unsuccessful, function prints the URL query that was constructed.
#'
Expand All @@ -29,23 +30,20 @@
#' \href{http://rstudio-pubs-static.s3.amazonaws.com/19337_2e7f827190514c569ea136db788ce850.html}{here}.
#'
#' @export
get_census_api_2 <- function(data_url, key, get, region){
get_census_api_2 <- function(data_url, key, get, region, retry = 0){
if(length(get)>1) {
get <- paste(get, collapse=',', sep='')
}
api_call <- paste(data_url, 'key=', key, '&get=', get, '&', region, sep='')
dat_raw <- try(readLines(api_call, warn="F"))
#
# The following will pause and retry the census website access. But we are not using it right now.
#
# retry = 5
# while ((class(dat_raw) == 'try-error') && (retry > 0)) {
# print(paste("Try census server again:", data_url))
# Sys.sleep(1)
# retry <- retry - 1
# dat_raw <- try(readLines(api_call, warn="F"))
# }
#

while ((class(dat_raw) == 'try-error') && (retry > 0)) {
print(paste("Try census server again:", data_url))
Sys.sleep(1)
retry <- retry - 1
dat_raw <- try(readLines(api_call, warn="F"))
}

if(class(dat_raw) == 'try-error') {
print("Data access failure at the census website, please try again by re-run the previous command")
stop(print(api_call))
Expand Down
9 changes: 5 additions & 4 deletions R/get_census_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@
#' @param census.geo An optional character vector specifying what level of
#' geography to use to merge in U.S. Census 2010 geographic data. Currently
#' \code{"county"}, \code{"tract"}, or \code{"block"} are supported.
#' @param retry The number of retries at the census website if network interruption occurs.
#' @return Output will be an object of class \code{list} indexed by state.
#' Output will contain the following elements: \code{state}, \code{age}, \code{sex},
#' \code{county}, \code{tract} and \code{block}.
#'
#' @export
#'
#' @examples \dontrun{get_census_data(key = "...", states = c("NJ", "NY"), age = TRUE, sex = FALSE)}
get_census_data <- function(key, states, age = FALSE, sex = FALSE, census.geo = "block") {
get_census_data <- function(key, states, age = FALSE, sex = FALSE, census.geo = "block", retry = 0) {

if (missing(key)) {
stop('Must enter valid Census API key, which can be requested at http://api.census.gov/data/key_signup.html.')
Expand All @@ -37,15 +38,15 @@ get_census_data <- function(key, states, age = FALSE, sex = FALSE, census.geo =
for (s in states) {
CensusObj[[s]] <- list(state = s, age = age, sex = sex)
if (census.geo == "block") {
block <- census_geo_api(key, s, geo = "block", age, sex)
block <- census_geo_api(key, s, geo = "block", age, sex, retry)
CensusObj[[s]]$block <- block
}
if ((census.geo == "block") || (census.geo == "tract")) {
tract <- census_geo_api(key, s, geo = "tract", age, sex)
tract <- census_geo_api(key, s, geo = "tract", age, sex, retry)
CensusObj[[s]]$tract <- tract
}
if ((census.geo == "block") || (census.geo == "tract") || (census.geo == "county")) {
county <- census_geo_api(key, s, geo = "county", age, sex)
county <- census_geo_api(key, s, geo = "county", age, sex, retry)
CensusObj[[s]]$county <- county
}
}
Expand Down
9 changes: 5 additions & 4 deletions R/predict_race.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
#' on individual's party registration (in addition to geolocation).
#' Whatever the name of the party registration field in \code{\var{voter.file}},
#' it should be coded as 1 for Democrat, 2 for Republican, and 0 for Other.
#' @param retry The number of retries at the census website if network interruption occurs.
#' @return Output will be an object of class \code{data.frame}. It will
#' consist of the original user-input data with additional columns with
#' predicted probabilities for each of the five major racial categories:
Expand All @@ -91,7 +92,7 @@
## Race Prediction Function
predict_race <- function(voter.file,
census.surname = TRUE, surname.only = FALSE, surname.year = 2010,
census.geo, census.key, census.data = NA, age = FALSE, sex = FALSE, party) {
census.geo, census.key, census.data = NA, age = FALSE, sex = FALSE, party, retry = 0) {

if (!missing(census.geo) && (census.geo == "precinct")) {
# geo <- "precinct"
Expand Down Expand Up @@ -181,7 +182,7 @@ predict_race <- function(voter.file,
geo = "block",
age = age,
sex = sex,
census.data = census.data)
census.data = census.data, retry = retry)
}

if (census.geo == "precinct") {
Expand All @@ -199,7 +200,7 @@ predict_race <- function(voter.file,
geo = "tract",
age = age,
sex = sex,
census.data = census.data)
census.data = census.data, retry = retry)
}

if (census.geo == "county") {
Expand All @@ -212,7 +213,7 @@ predict_race <- function(voter.file,
geo = "county",
age = age,
sex = sex,
census.data = census.data)
census.data = census.data, retry = retry)
}

## Pr(Race | Surname, Geolocation)
Expand Down
5 changes: 4 additions & 1 deletion man/census_geo_api.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/census_helper.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/get_census_api.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/get_census_api_2.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/get_census_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/predict_race.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 5aa2de8

Please sign in to comment.