Skip to content

Commit

Permalink
Compress data, add documentation.
Browse files Browse the repository at this point in the history
  • Loading branch information
elinw committed Oct 29, 2020
1 parent 7ecb022 commit 9f7f344
Show file tree
Hide file tree
Showing 5 changed files with 178 additions and 2 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,8 @@ Imports: ggplot2,
tidyr
Suggests:
knitr,
learnr,
rmarkdown,
testthat (>= 2.0.0)
sf
VignetteBuilder:
knitr
Roxygen: list(markdown = TRUE)
Expand All @@ -44,3 +43,4 @@ Collate:
"salem_village.R"
"tax_comparison.R"
"parris_social.R"
"salem_region.R"
50 changes: 50 additions & 0 deletions R/salem_region.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#' Simple Features data for Salem
#'
#' A dataset containing map information for the three counties
#' represented in the accused_witches data set (not including present-day
#' Maine locations) in simple features format. It also includes associated
#' data on accusations. This data should _never_ be used to analyze accusations.
#' The data is based on modern town boundaries from the Massachuessetts
#' Document Repository. Some simple cleaning has been done to facilitate
#' basic mapping. Specifically, the names Salem Town and Salem Village are used
#' rather than the modern Salem and Danvers. Additionally, a field, TOWN_LABEL
#' is attached to the largest area for each town or community.
#'
#' Complex analyses require the sf package, but simple mapping can be
#' done using ggplot2.
#'
#' FIPS refers to the Federal Information Processing Standard
#'
#' @format A data frame with 437 rows and 28 variables:
#' \describe{
#' \item{TOWN}{Name of town in upper case letters}
#' \item{TOWN_ID}{ID of town}
#' \item{TYPE}{Type of municipality C = City or T = Town}
#' \item{ISLAND}{Indicator for islands. Note that many towns have multiple polygons}
#' \item{COASTAL_PO}{YES for ocean polygon within town limits, NO otherwise}
#' \item{FOURCOLOR}{Codes allowing shading of all towns with 4 symbols}
#' \item{FIPS_STCO}{FIPS for state and county}
#' \item{CCD_MCD}{US Census town code}
#' \item{FIPS_PLACE}{FIPS town code}
#' \item{FIPS_MCD}{FIPS State & County & Census Town Code concatenated}
#' \item{FIPS_COUNT}{FIPS County}
#' \item{ACRES}{Area in acres}
#' \item{SQUARE_MIL}{Area in square miles}
#' \item{SHAPE_Leng}{The perimeter in meters}
#' \item{SHAPE_Area}{The polygon area in square meters}
#' \item{n_accused}{Total accused}
#' \item{February}{Number accused in February}
#' \item{March}{Number accused in March}
#' \item{April}{Number accused in April}
#' \item{May}{Number accused in May}
#' \item{June}{Number accused in June}
#' \item{July}{Number accused in July}
#' \item{August}{Number accused in August}
#' \item{September}{Number accused in September}
#' \item{October}{Number accused in October}
#' \item{November}{Number accused in November}
#' \item{TOWN_LABEL}{Town name to be used for labelling}
#' \item{geometry}{Simple Features geometry}
#' }
#' @source \url{https://docs.digital.mass.gov/dataset/massgis-data-community-boundaries-towns-survey-points}
"salem_region"
Binary file modified data/salem_region.rda
Binary file not shown.
63 changes: 63 additions & 0 deletions man/salem_region.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

63 changes: 63 additions & 0 deletions rawdata/create_data_frames.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,67 @@ tax_comparison <- tax_comparison %>%

save(tax_comparison, file = "data/tax_comparison.rda")

# This section describes how the map file was created. However the raw data
# is not included in the package. This should be downloaded from
# https://docs.digital.mass.gov/dataset/massgis-data-community-boundaries-towns-survey-points
# using the TOWNSURVEYPOLY data.
# The sf package is required.
library(sf)
library(dplyr)
MA_towns <-st_read("rawdata/towns/townssurvey_shp/townsurvey_poly/TOWNSSURVEY_POLY.shp")
# Select just the three counties with towns in the other data sets.
# Note that towns in present day Maine wil not be mapped here.
salem_region <- MA_towns %>% dplyr::filter(FIPS_STCO %in% c(25009, 25017, 25025))
# We need to do this because of an issue in the shape file.
salem_region <- sf::st_buffer(salem_region, dist = 0)
#change Danvers to "SALEM VLLAGE" and Salem to "SALEM TOWN to match
# the other data sets.
salem_region$TOWN[salem_region$TOWN == "DANVERS"] <- "SALEM VILLAGE"
salem_region$TOWN[salem_region$TOWN == "SALEM"] <- "SALEM TOWN"

accused_towns <- accused_witches %>%
mutate(TOWN = toupper(Residence)) %>%
group_by(TOWN) %>%
summarize(n_accused = n())

# Add the total number accused to each town
salem_region <- merge(salem_region, accused_towns, by = "TOWN", all = TRUE)

# Add the total number of accused by month.
accused_town_monthly <- accused_witches %>%
tidyr::pivot_wider(id_cols = c(Accused.Witch, Residence),
names_from = Month.of.Accusation.Name,
values_from = Month.of.Accusation.Name) %>%
mutate(TOWN = toupper(Residence)) %>%
group_by(TOWN) %>%
summarize(
February = sum(!is.na(February)),
March = sum(!is.na(March)),
April = sum(!is.na(April)),
May = sum(!is.na(May)),
June = sum(!is.na(June)),
July = sum(!is.na(July)),
August = sum(!is.na(August)),
September = sum(!is.na(September)),
October = sum(!is.na(October)),
November = sum(!is.na(November))
)
salem_region <- merge(salem_region, accused_town_monthly,
by = "TOWN", all = TRUE)

# Many towns have more than one row. This code creates a
# TOWN_LABEL variable that is assigned to the largest area represented
# in the records for the town.
salem_region_max <- as.data.frame(salem_region) %>% group_by(TOWN) %>%
summarize(max_sqm = max(SQUARE_MIL))

salem_region <- merge(salem_region, salem_region_max, by = "TOWN", all = TRUE)
salem_region$TOWN_LABEL <- ifelse(salem_region$SQUARE_MIL != salem_region$max_sqm,
"",
salem_region$TOWN
)

# Drop the twentieth century data
salem_region <- salem_region %>% select(-starts_with("POP"), -max_sqm)
save(salem_region, file = "data/salem_region.rda")

0 comments on commit 9f7f344

Please sign in to comment.