-
Notifications
You must be signed in to change notification settings - Fork 31
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #18 from kosukeimai/update-surname-handling
Merging development branch
- Loading branch information
Showing
48 changed files
with
1,623 additions
and
1,644 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
Date Version Comment | ||
2015-12-09 0.0-1 First version on CRAN | ||
2016-03-04 0.0-2 Minor improvements | ||
2016-12-13 0.1-1 New function to pre-download Census data and other minor improvements | ||
2017-03-03 0.1-2 Updated surname handling, enhanced demographics option, and improved error handling and documentation |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,9 @@ | ||
Package: wru | ||
Version: 0.1-1 | ||
Date: 2016-12-12 | ||
Version: 0.1-2 | ||
Date: 2017-3-3 | ||
Title: Who Are You? Bayesian Prediction of Racial Category Using Surname and | ||
Geolocation | ||
Author: Kabir Khanna [aut, cre], Kosuke Imai [aut, cre] | ||
Author: Kabir Khanna [aut, cre], Kosuke Imai [aut, cre], Hubert Jin [ctb] | ||
Maintainer: Kabir Khanna <[email protected]> | ||
Description: This open-source software package enables researchers to predict | ||
individual ethnicity using his/her surname, geolocation, and other attributes | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,11 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(census.helper.api) | ||
export(census.helper.api.local) | ||
export(census.helper.api.online) | ||
export(censusData) | ||
export(getCensusApi) | ||
export(getCensusApi2) | ||
export(getCensusData) | ||
export(name.clean) | ||
export(race.pred) | ||
export(vecToChunk) | ||
export(census_geo_api) | ||
export(census_helper) | ||
export(get_census_api) | ||
export(get_census_api_2) | ||
export(get_census_data) | ||
export(merge_surnames) | ||
export(predict_race) | ||
export(vec_to_chunk) | ||
import(devtools) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#' State's FIPS. | ||
#' | ||
#' List of States and their FIPS. | ||
#' | ||
#' @format A data frame with 55 rows and 2 variables: | ||
#' \describe{ | ||
#' \item{State}{State} | ||
#' \item{FIPS}{FIPS} | ||
#' #' } | ||
#' | ||
#' @docType data | ||
#' @keywords datasets | ||
#' @name State.FIPS | ||
#' @examples | ||
#' data(State.FIPS) | ||
"State.FIPS" |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,234 @@ | ||
#' Census Data download function. | ||
#' | ||
#' \code{census_geo_api} retrieves U.S. Census geographic data for a given state. | ||
#' | ||
#' This function allows users to download U.S. Census 2010 geographic data, | ||
#' at either the county, tract, or block level, for a particular state. | ||
#' | ||
#' @param key A required character object. Must contain user's Census API | ||
#' key, which can be requested \href{http://api.census.gov/data/key_signup.html}{here}. | ||
#' @param state A required character object specifying which state to extract Census data for, | ||
#' e.g., \code{"NJ"}. | ||
#' @param geo A character object specifying what aggregation level to use. | ||
#' Use \code{"county"}, \code{"tract"}, or \code{"block"}. Default is \code{"tract"}. | ||
#' Warning: extracting block-level data takes very long. | ||
#' @param age A \code{TRUE}/\code{FALSE} object indicating whether to condition on | ||
#' age or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). | ||
#' If \code{TRUE}, function will return Pr(Geolocation, Age | Race). | ||
#' If \code{\var{sex}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race). | ||
#' @param sex A \code{TRUE}/\code{FALSE} object indicating whether to condition on | ||
#' sex or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). | ||
#' If \code{TRUE}, function will return Pr(Geolocation, Sex | Race). | ||
#' If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race). | ||
#' @return Output will be an object of class \code{list}, indexed by state names. It will | ||
#' consist of the original user-input data with additional columns of Census geographic data. | ||
#' | ||
#' @examples | ||
#' \dontshow{data(voters)} | ||
#' \dontrun{census_geo_api(key = "...", states = c("NJ", "DE"), geo = "block")} | ||
#' \dontrun{census_geo_api(key = "...", states = "FL", geo = "tract", age = TRUE, sex = TRUE)} | ||
#' | ||
#' @references | ||
#' Relies on get_census_api, get_census_api_2, and vec_to_chunk functions authored by Nicholas Nagle, | ||
#' available \href{http://rstudio-pubs-static.s3.amazonaws.com/19337_2e7f827190514c569ea136db788ce850.html}{here}. | ||
#' | ||
#' @export | ||
census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE) { | ||
|
||
if (missing(key)) { | ||
stop('Must enter U.S. Census API key, which can be requested at http://api.census.gov/data/key_signup.html.') | ||
} | ||
|
||
state <- toupper(state) | ||
|
||
df.out <- NULL | ||
|
||
fips.codes <- get("State.FIPS") | ||
state.fips <- fips.codes[fips.codes$State == state, "FIPS"] | ||
|
||
if (age == F & sex == F) { | ||
num <- ifelse(3:10 != 10, paste("0", 3:10, sep = ""), "10") | ||
vars <- paste("P00500", num, sep = "") | ||
} | ||
|
||
if (age == F & sex == T) { | ||
eth.let <- c("I", "B", "H", "D", "E", "F", "C") | ||
num <- as.character(c("01", "02", "26")) | ||
vars <- NULL | ||
for (e in 1:length(eth.let)) { | ||
vars <- c(vars, paste("P012", eth.let[e], "0", num, sep = "")) | ||
} | ||
} | ||
|
||
if (age == T & sex == F) { | ||
eth.let <- c("I", "B", "H", "D", "E", "F", "C") | ||
num <- as.character(c(c("01", "03", "04", "05", "06", "07", "08", "09"), seq(10, 25), seq(27, 49))) | ||
vars <- NULL | ||
for (e in 1:length(eth.let)) { | ||
vars <- c(vars, paste("P012", eth.let[e], "0", num, sep = "")) | ||
} | ||
} | ||
|
||
if (age == T & sex == T) { | ||
eth.let <- c("I", "B", "H", "D", "E", "F", "C") | ||
num <- as.character(c(c("01", "03", "04", "05", "06", "07", "08", "09"), seq(10, 25), seq(27, 49))) | ||
vars <- NULL | ||
for (e in 1:length(eth.let)) { | ||
vars <- c(vars, paste("P012", eth.let[e], "0", num, sep = "")) | ||
} | ||
} | ||
|
||
if (geo == "county") { | ||
geo.merge <- c("state", "county") | ||
region <- paste("for=county:*&in=state:", state.fips, sep = "") | ||
census <- get_census_api("http://api.census.gov/data/2010/sf1?", | ||
key = key, vars = vars, region = region) | ||
} | ||
|
||
if (geo == "tract") { | ||
|
||
geo.merge <- c("state", "county", "tract") | ||
|
||
region_county <- paste("for=county:*&in=state:", state.fips, sep = "") | ||
county_df <- get_census_api("http://api.census.gov/data/2010/sf1?", key = key, vars = vars, region = region_county) | ||
county_list <- county_df$county | ||
|
||
census <- NULL | ||
for (c in 1:length(county_list)) { | ||
print(paste("County ", c, " of ", length(county_list), ": ", county_list[c], sep = "")) | ||
region_county <- paste("for=tract:*&in=state:", state.fips, "+county:", county_list[c], sep = "") | ||
census.temp <- get_census_api("http://api.census.gov/data/2010/sf1?", | ||
key = key, vars = vars, region = region_county) | ||
census <- rbind(census, census.temp) | ||
} | ||
rm(census.temp) | ||
} | ||
|
||
if (geo == "block") { | ||
|
||
geo.merge <- c("state", "county", "tract", "block") | ||
|
||
region_county <- paste("for=county:*&in=state:", state.fips, sep = "") | ||
county_df <- get_census_api("http://api.census.gov/data/2010/sf1?", key = key, vars = vars, region = region_county) | ||
county_list <- county_df$county | ||
|
||
census <- NULL | ||
|
||
for (c in 1:length(county_list)) { | ||
print(paste("County ", c, " of ", length(county_list), ": ", county_list[c], sep = "")) | ||
|
||
region_tract <- paste("for=tract:*&in=state:", state.fips, "+county:", county_list[c], sep = "") | ||
print(region_tract) | ||
tract_df <- get_census_api("http://api.census.gov/data/2010/sf1?", | ||
key = key, vars = vars, region = region_tract) | ||
tract_list <- tract_df$tract | ||
|
||
for (t in 1:length(tract_list)) { | ||
print(paste("Tract ", t, " of ", length(tract_list), ": ", tract_list[t], sep = "")) | ||
|
||
region_block <- paste("for=block:*&in=state:", state.fips, "+county:", county_list[c], "+tract:", tract_list[t], sep = "") | ||
census.temp <- get_census_api("http://api.census.gov/data/2010/sf1?", | ||
key = key, vars = vars, region = region_block) | ||
census <- rbind(census, census.temp) | ||
} | ||
} | ||
|
||
rm(census.temp) | ||
|
||
} | ||
|
||
census$state <- state | ||
|
||
if (age == F & sex == F) { | ||
|
||
## Calculate Pr(Geolocation | Race) | ||
census$r_whi <- census$P0050003 / sum(census$P0050003) #Pr(Tract|White) | ||
census$r_bla <- census$P0050004 / sum(census$P0050004) #Pr(Tract|Black) | ||
census$r_his <- census$P0050010 / sum(census$P0050010) #Pr(Tract|Latino) | ||
census$r_asi <- (census$P0050006 + census$P0050007) / (sum(census$P0050006) + sum(census$P0050007)) #Pr(Tract | Asian or NH/PI) | ||
census$r_oth <- (census$P0050005 + census$P0050008 + census$P0050009) / (sum(census$P0050005) + sum(census$P0050008) + sum(census$P0050009)) #Pr(Tract | AI/AN, Other, or Mixed) | ||
|
||
} | ||
|
||
if (age == F & sex == T) { | ||
|
||
## Calculate Pr(Geolocation, Sex | Race) | ||
eth.cen <- c("whi", "bla", "his", "asi", "oth") | ||
eth.let <- c("I", "B", "H", "D", "F") | ||
|
||
for (i in 1:length(eth.cen)) { | ||
if (i != 4 & i != 5) { | ||
census[paste("r_mal", eth.cen[i], sep = "_")] <- census[paste("P012", eth.let[i], "002", sep = "")] / sum(census[paste("P012", eth.let[i], "001", sep = "")]) | ||
census[paste("r_fem", eth.cen[i], sep = "_")] <- census[paste("P012", eth.let[i], "026", sep = "")] / sum(census[paste("P012", eth.let[i], "001", sep = "")]) | ||
} | ||
if (i == 4) { | ||
## Combine Asian and Native Hawaiian/Pacific Islander | ||
census[paste("r_mal", eth.cen[i], sep = "_")] <- (census$P012D002 + census$P012E002) / sum(census$P012D001 + census$P012E001) | ||
census[paste("r_fem", eth.cen[i], sep = "_")] <- (census$P012D026 + census$P012E026) / sum(census$P012D001 + census$P012E001) | ||
} | ||
if (i == 5) { | ||
## Combine American India/Alaska Native and Other | ||
census[paste("r_mal", eth.cen[i], sep = "_")] <- (census$P012C002 + census$P012F002) / sum(census$P012C001 + census$P012F001) | ||
census[paste("r_fem", eth.cen[i], sep = "_")] <- (census$P012C026 + census$P012F026) / sum(census$P012C001 + census$P012F001) | ||
} | ||
} | ||
} | ||
|
||
if (age == T & sex == F) { | ||
|
||
## Calculate Pr(Geolocation, Age Category | Race) | ||
eth.cen <- c("whi", "bla", "his", "asi", "oth") | ||
eth.let <- c("I", "B", "H", "D", "F") | ||
age.cat <- c(seq(1, 23), seq(1, 23)) | ||
age.cen <- as.character(c(c("03", "04", "05", "06", "07", "08", "09"), seq(10, 25), seq(27, 49))) | ||
|
||
for (i in 1:length(eth.cen)) { | ||
for (j in 1:23) { | ||
if (i != 4 & i != 5) { | ||
census[paste("r", age.cat[j], eth.cen[i], sep = "_")] <- (census[paste("P012", eth.let[i], "0", age.cen[j], sep = "")] + census[paste("P012", eth.let[i], "0", age.cen[j + 23], sep = "")]) / sum(census[paste("P012", eth.let[i], "001", sep = "")]) | ||
} | ||
if (i == 4) { | ||
## Combine Asian and Native Hawaiian/Pacific Islander | ||
census[paste("r", age.cat[j], eth.cen[i], sep = "_")] <- (census[paste("P012D0", age.cen[j], sep = "")] + census[paste("P012D0", age.cen[j + 23], sep = "")] + census[paste("P012E0", age.cen[j], sep = "")] + census[paste("P012E0", age.cen[j + 23], sep = "")]) / sum(census$P012D001 + census$P012E001) | ||
} | ||
if (i == 5) { | ||
## Combine American India/Alaska Native and Other | ||
census[paste("r", age.cat[j], eth.cen[i], sep = "_")] <- (census[paste("P012C0", age.cen[j], sep = "")] + census[paste("P012C0", age.cen[j + 23], sep = "")] + census[paste("P012F0", age.cen[j], sep = "")] + census[paste("P012F0", age.cen[j + 23], sep = "")]) / sum(census$P012C001 + census$P012F001) | ||
} | ||
} | ||
} | ||
} | ||
|
||
if (age == T & sex == T) { | ||
|
||
## Calculate Pr(Geolocation, Sex, Age Category | Race) | ||
eth.cen <- c("whi", "bla", "his", "asi", "oth") | ||
eth.let <- c("I", "B", "H", "D", "F") | ||
sex.let <- c("mal", "fem") | ||
age.cat <- c(seq(1, 23), seq(1, 23)) | ||
age.cen <- as.character(c(c("03", "04", "05", "06", "07", "08", "09"), seq(10, 25), seq(27, 49))) | ||
|
||
for (i in 1:length(eth.cen)) { | ||
for (k in 1:length(sex.let)) { | ||
for (j in 1:23) { | ||
if (k == 2) { | ||
j <- j + 23 | ||
} | ||
if (i != 4 & i != 5) { | ||
census[paste("r", sex.let[k], age.cat[j], eth.cen[i], sep = "_")] <- census[paste("P012", eth.let[i], "0", age.cen[j], sep = "")] / sum(census[paste("P012", eth.let[i], "001", sep = "")]) | ||
} | ||
if (i == 4) { | ||
## Combine Asian and Native Hawaiian/Pacific Islander | ||
census[paste("r", sex.let[k], age.cat[j], eth.cen[i], sep = "_")] <- (census[paste("P012D0", age.cen[j], sep = "")] + census[paste("P012E0", age.cen[j], sep = "")]) / sum(census$P012D001 + census$P012E001) | ||
} | ||
if (i == 5) { | ||
## Combine American India/Alaska Native and Other | ||
census[paste("r", sex.let[k], age.cat[j], eth.cen[i], sep = "_")] <- (census[paste("P012C0", age.cen[j], sep = "")] + census[paste("P012F0", age.cen[j], sep = "")]) / sum(census$P012C001 + census$P012F001) | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
return(census) | ||
} |
Oops, something went wrong.