Merge pull request #43 from christopherkenny/master

Some tweaks to pass R CMD Check
kosukeimai · Feb 22, 2022 · 5153437 · 5153437
2 parents 8af0a6a + 09f3ba5
commit 5153437
Show file tree

Hide file tree

Showing 41 changed files with 298 additions and 110 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -2,3 +2,4 @@
 ^\.Rproj\.user$
 ^\.travis\.yml$
 ^wru\.Rproj$
+^\.github$
diff --git a/.github/.gitignore b/.github/.gitignore
@@ -0,0 +1 @@
+*.html
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -0,0 +1,58 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/master/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+    branches: [main, master]
+
+name: R-CMD-check
+
+jobs:
+  R-CMD-check:
+    runs-on: ${{ matrix.config.os }}
+
+    name: ${{ matrix.config.os }} (${{ matrix.config.r }})
+
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - {os: macOS-latest,   r: 'release'}
+          - {os: windows-latest, r: 'release'}
+          - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
+          - {os: ubuntu-latest,   r: 'release'}
+          - {os: ubuntu-latest,   r: 'oldrel-1'}
+
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      R_KEEP_PKG_SOURCE: yes
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - uses: r-lib/actions/setup-pandoc@v1
+
+      - uses: r-lib/actions/setup-r@v1
+        with:
+          r-version: ${{ matrix.config.r }}
+          http-user-agent: ${{ matrix.config.http-user-agent }}
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v1
+        with:
+          extra-packages: rcmdcheck
+
+      - uses: r-lib/actions/check-r-package@v1
+
+      - name: Show testthat output
+        if: always()
+        run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true
+        shell: bash
+
+      - name: Upload check results
+        if: failure()
+        uses: actions/upload-artifact@main
+        with:
+          name: ${{ runner.os }}-r${{ matrix.config.r }}-results
+          path: check
diff --git a/.gitignore b/.gitignore
@@ -19,3 +19,8 @@ vignettes/*.pdf
 
 # other files
 .DS_Store
+
+# Rcpp files 
+*.o
+*.so
+*.dll
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -24,7 +24,8 @@ Imports:
     devtools (>= 1.10.0),
     PL94171,
     stringr,
-    Rcpp
+    Rcpp,
+    dplyr (>= 1.0.0)
 LinkingTo: 
     Rcpp,
     RcppEigen,
@@ -35,4 +36,5 @@ LazyLoad: yes
 LazyData: yes
 LazyDataCompression: xz
 License: GPL (>= 3)
-RoxygenNote: 7.1.1.9001
+RoxygenNote: 7.1.2
+Encoding: UTF-8
diff --git a/NAMESPACE b/NAMESPACE
@@ -17,4 +17,6 @@ import(PL94171)
 import(devtools)
 import(stringr)
 importFrom(Rcpp,evalCpp)
+importFrom(dplyr,.data)
+importFrom(dplyr,`%>%`)
 useDynLib(wru, .registration=TRUE)
diff --git a/R/R.Rproj b/R/R.Rproj
diff --git a/R/co_cluster.R b/R/co_cluster.R
@@ -16,7 +16,7 @@
 #' @param name_race_tables Named list, with as many elements as there are names in \code{name_types}, and names matching
 #'                         elements in \code{name_types}. Each list element should be a data.frame of unique names (first column) by race (remaining columns),
 #'                         with conditional probabilities p(Name|Race). 
-#' @param census_geo Required character vector. One of "county", "tract", "block" or "place". See \code{\link{predict_race}}.                        
+#' @param census.geo Required character vector. One of "county", "tract", "block" or "place". See \code{\link{predict_race}}.                        
 #' @param ... Arguments passed to \code{\link{predict_race}}.
 #' @param control List of control arguments, including 
 #' \itemize{

diff --git a/R/data.R b/R/data.R
@@ -0,0 +1,59 @@
+#' Dictionary of First Names
+#'
+#' An example dataset containing voter file information.
+#'
+#' @format A data frame with 1,043,742 rows and 6 columns:
+#' \describe{
+#'   \item{first_name}{Voter identifier (numeric)}
+#'   \item{p_whi_first}{Probability of white}
+#'   \item{p_bla_first}{Probability of black}
+#'   \item{p_his_first}{Probability of Hispanic}
+#'   \item{p_asi_first}{Probability of asian},
+#'   \item{p_oth_first}{Probability of other}
+#'   }
+#'
+#' @keywords datasets
+#' @name firstNameDict
+#' @examples
+#' data(firstNameDict)
+NULL
+
+#' Dictionary of Last Names
+#'
+#' An example dataset containing voter file information.
+#'
+#' @format A data frame with 1,502,541 rows and 6 columns:
+#' \describe{
+#'   \item{last_name}{Voter identifier (numeric)}
+#'   \item{p_whi_last}{Probability of white}
+#'   \item{p_bla_last}{Probability of black}
+#'   \item{p_his_last}{Probability of Hispanic}
+#'   \item{p_asi_last}{Probability of asian},
+#'   \item{p_oth_last}{Probability of other}
+#'   }
+#'
+#' @keywords datasets
+#' @name lastNameDict
+#' @examples
+#' data(lastNameDict)
+NULL
+
+#' Dictionary of Middle Names
+#'
+#' An example dataset containing voter file information.
+#'
+#' @format A data frame with 1,182,133 rows and 6 columns:
+#' \describe{
+#'   \item{middle_name}{Voter identifier (numeric)}
+#'   \item{p_whi_middle}{Probability of white}
+#'   \item{p_bla_middle}{Probability of black}
+#'   \item{p_his_middle}{Probability of Hispanic}
+#'   \item{p_asi_middle}{Probability of asian},
+#'   \item{p_oth_middle}{Probability of other}
+#'   }
+#'
+#' @keywords datasets
+#' @name middleNameDict
+#' @examples
+#' data(middleNameDict)
+NULL
diff --git a/R/format_legacy_data.R b/R/format_legacy_data.R
@@ -16,12 +16,14 @@
 #' filepath should end in ".RData". 
 #'
 #' @import PL94171
+#' @importFrom dplyr `%>%` .data
 #' 
 #' @examples
-#' gaCensusData <- format_legacy_data('~/Desktop/ga2020.pl')
+#' \dontrun{
+#' gaCensusData <- format_legacy_data(PL94171::pl_url('GA', 2020))
 #' predict_race_new(ga.voter.file, namesToUse = 'last, first, mid', census.geo = 'block',
 #'      census.data = gaCensusData)
-
+#' }
 #'
 #' @export
 format_legacy_data <- function(legacyFilePath, outFile = NULL) {
@@ -30,32 +32,36 @@ format_legacy_data <- function(legacyFilePath, outFile = NULL) {
   summaryLevels <- c('050', '140', '150', '750')
 
   # read in the data
-  pl <- pl_read(legacyFilePath)
-  pl <- pl_select_standard(pl) 
+  pl <- PL94171::pl_read(legacyFilePath)
+  pl <- PL94171::pl_select_standard(pl) 
 
   # iterate through the levels 
   censusData.2020 <- lapply(summaryLevels, FUN = function(level) {
-    levelData <- pl[pl$summary_level == level,]
+    levelData <- PL94171::pl_subset(pl, level)
 
     # construct the base data frame
-    df <- data.frame(state = toupper(state), 
-                     county = levelData$county,
-                     P005003 = levelData$pop_white, 
-                     P005004 = levelData$pop_black, 
-                     P005010 = levelData$pop_hisp,
-                     P005006 = levelData$pop_asian,
-                     P005007 = levelData$pop_nhpi,
-                     P005005 = levelData$pop_aian, 
-                     P005008 = levelData$pop_other, 
-                     P005009 = levelData$pop_two)
+    df <- levelData %>% 
+      dplyr::select(GEOID = .data$GEOID, 
+                    state = toupper(.data$state), 
+                    county = .data$county,
+                    P005003 = .data$pop_white, 
+                    P005004 = .data$pop_black, 
+                    P005010 = .data$pop_hisp,
+                    P005006 = .data$pop_asian,
+                    P005007 = .data$pop_nhpi,
+                    P005005 = .data$pop_aian, 
+                    P005008 = .data$pop_other, 
+                    P005009 = .data$pop_two
+      )
 
     # add geographic levels
     if(level != '050') {
-      df$tract <- substr(levelData$GEOID, nchar(levelData$GEOID) - 5, nchar(levelData$GEOID))
+      df <- df %>% dplyr::mutate(tract = substr(.data$GEOID, nchar(.data$GEOID) - 5, nchar(.data$GEOID)))
       if(level != '140') {
-        df$blockGroup <- substr(levelData$GEOID, nchar(levelData$GEOID), nchar(levelData$GEOID))
-        if(level != '150') 
-          df$block <- substr(levelData$GEOID, nchar(levelData$GEOID)-2, nchar(levelData$GEOID))
+        df <- df %>% dplyr::mutate(blockGroup = substr(.data$GEOID, nchar(.data$GEOID), nchar(.data$GEOID)))
+        if(level != '150') {
+          df <- df %>% dplyr::mutate(block = substr(.data$GEOID, nchar(.data$GEOID) - 2, nchar(.data$GEOID)))
+        }
       }
     }
 

diff --git a/R/merge_names.R b/R/merge_names.R
@@ -50,11 +50,12 @@
 #' @import stringr
 #'
 #' @examples
+#' \donttest{
 #' data(voters)
-#' merge_names(voters)
-#'
+#' merge_names(voters, 'last, first')
+#'}
 #' @export
-merge_names <- function(voter.file, namesToUse, clean.names = T) {
+merge_names <- function(voter.file, namesToUse, clean.names = TRUE) {
 
   # check the names
   if(namesToUse == 'last') {
@@ -214,7 +215,6 @@ merge_names <- function(voter.file, namesToUse, clean.names = T) {
 
 
   ## For unmatched names, just fill with a 1
-  library(dplyr)
   warning(paste(paste(sum(is.na(df$p_whi_last)), " (", round(100*mean(is.na(df$p_whi_last)), 1), "%) indivduals' last names were not matched.", sep = "")))
   if(grepl('first', namesToUse)) {
     warning(paste(paste(sum(is.na(df$p_whi_first)), " (", round(100*mean(is.na(df$p_whi_first)), 1), "%) indivduals' first names were not matched.", sep = "")))
@@ -224,7 +224,7 @@ merge_names <- function(voter.file, namesToUse, clean.names = T) {
   }
 
   for(i in grep("p_", names(df))) {
-    df[,i] <- coalesce(df[,i], 1)
+    df[,i] <- dplyr::coalesce(df[,i], 1)
   }
 
   # return the data

diff --git a/R/predict_race_v2.R b/R/predict_race_v2.R
@@ -21,7 +21,7 @@
 #' County is three characters (e.g., \code{"031"} not \code{"31"}),
 #' tract is six characters, and block is four characters. Place is five characters.
 #' See below for other optional fields.
-#' #' @param namesToUse A character vector identifying which names to use for the prediction.
+#' @param namesToUse A character vector identifying which names to use for the prediction.
 #' The default value is \code{"last"}, indicating that only the last name will be used.
 #' Other options are \code{"last, first"}, indicating that both last and first names will be
 #' used, and \code{"last, first, middle"}, indicating that last, first, and middle names will all
@@ -56,6 +56,8 @@
 #'  \code{\var{pred.asi}} for Asian/Pacific Islander, and
 #'  \code{\var{pred.oth}} for Other/Mixed.
 #'
+#' @export
+#'
 #' @examples
 #' data(voters)
 #' predict_race(voters, surname.only = TRUE)
@@ -69,9 +71,6 @@
 #' predict_race(voter.file = voters, census.geo = "tract", census.data = CensusObj2, age = T, sex = T)}
 #' \dontrun{CensusObj3 <- get_census_data(key = "...", state = c("NY", "DC", "NJ"), census.geo = "place");
 #' predict_race(voter.file = voters, census.geo = "place", census.data = CensusObj3)}
-#' @export
-
-## Race Prediction Function
 predict_race_new <- function(voter.file, namesToUse = 'last', census.geo, census.key,
                          census.data = NA, retry = 0) {
 

diff --git a/R/voters.R b/R/voters.R
@@ -17,10 +17,12 @@
 #'   \item{sex}{0=male, 1=female}
 #'   \item{party}{Party registration (character)}
 #'   \item{PID}{Party registration (numeric)}
-#'   #' }
-#' @docType data
+#'   \item{first}{First name}
+#'   \item{last}{Last name}
+#'   }
+#'
 #' @keywords datasets
 #' @name voters
 #' @examples
 #' data(voters)
-"voters"
+NULL
diff --git a/README.md b/README.md
@@ -1,4 +1,5 @@
 # wru: Who Are You? Bayesian Prediction of Racial Category Using Surname and Geolocation [![Build Status](https://travis-ci.org/kosukeimai/wru.svg?branch=master)](https://travis-ci.org/kosukeimai/wru) [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version-last-release/wru)](https://cran.r-project.org/package=wru) ![CRAN downloads](http://cranlogs.r-pkg.org/badges/grand-total/wru)
+[![R-CMD-check](https://github.com/kosukeimai/wru/workflows/R-CMD-check/badge.svg)](https://github.com/kosukeimai/wru/actions)
 
 <img src="wru.png" align="right" height="256" style="margin-left: 4px;"/>
 

diff --git a/data/first.RData b/data/first.RData
diff --git a/data/firstNameDict.rda b/data/firstNameDict.rda
diff --git a/data/last.RData b/data/last.RData
diff --git a/data/lastNameDict.rda b/data/lastNameDict.rda
diff --git a/data/mid.RData b/data/mid.RData
diff --git a/data/middleNameDict.rda b/data/middleNameDict.rda
diff --git a/data/surnames2000.RData b/data/surnames2000.RData
diff --git a/data/surnames2000.rda b/data/surnames2000.rda
diff --git a/data/surnames2010.RData b/data/surnames2010.RData
diff --git a/data/surnames2010.rda b/data/surnames2010.rda
diff --git a/data/voters.RData b/data/voters.RData
diff --git a/data/voters.rda b/data/voters.rda
diff --git a/man/co_cluster.Rd b/man/co_cluster.Rd
diff --git a/man/firstNameDict.Rd b/man/firstNameDict.Rd
diff --git a/man/format_legacy_data.Rd b/man/format_legacy_data.Rd