-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCheckPopSyn3.R
61 lines (44 loc) · 2.28 KB
/
CheckPopSyn3.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
library(dplyr)
library(ggplot2)
library(reshape2)
#############################################################################################
#' Title: "Check PopSyn3 performance against CSD totals"
#############################################################################################
# set working directory
wd <- setwd("c:/personal/r")
##############################################################################################
#' batch in PopSyn3 Pop and HHold Outputs and CSD level Inputs
#' for comparing popsyn3 accuracy
pop3 <- read.csv(file = "synpop_person_with_required_fields.csv", stringsAsFactors = FALSE)
hh3 <- read.csv(file = "synpop_hh.csv", stringsAsFactors = FALSE)
# read control total file
csdin <- read.csv(file = "tazData.csv", stringsAsFactors = FALSE)
##############################################################################################
#' Summarize control data files
csdin.sum <- csdin %>% group_by(csduid)%>%
summarise(hhin=sum(tothh), popin=sum(totpop))
#' summarize Popsyn3 Population data
pop3.sum <- pop3 %>% group_by(taz) %>%
summarise(popl3 = sum(finalweight))
#' summarize and count Popsyn3 Household data
hh3.sum <- hh3 %>% group_by(taz) %>%
summarise(hh3 = sum(finalweight))
#' Now join the data to gauge differences in Popsyn3 outputs and inputs
#' The GGH area does not have any corressponding CSD IDs so it is removed
joined <- merge(csdin.sum,pop3.sum, by.x="csduid", by.y="taz", all.x = T) %>%
subset(., csduid!=0)
#now join the housing data
joined <- merge(joined, hh3.sum, by.x="csduid", by.y="taz", all.x = T)
##############################################################################################
#' estimate differences between input and output fields
joined$popdiff <- joined$popin-joined$popl3
joined$hhdiff <- joined$hhin-joined$hh3
joined <- subset(joined, joined$popin !=0)
sum(joined$popdiff)
sum(joined$hhdiff)
# plot the differences of households
ggplot(joined, aes(x=csduid, y=hhdiff))+geom_line(color="grey")+geom_point(color="red")+
ggtitle("Household Differences (Input Households - PopSyn3 Households)")
# plot the differences of population
ggplot(joined, aes(x=csduid, y=popdiff))+geom_line(color="grey")+geom_point(color="red")+
ggtitle("Population Differences (Input Population - PopSyn3 Population)")