-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNoFixedPlaceofWork.R
296 lines (237 loc) · 12.1 KB
/
NoFixedPlaceofWork.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
library(sp)
library(rgdal)
library(rgeos)
library(dplyr)
library(reshape2)
library(stringi)
library(maptools)
library(spatialEco)
library(progress)
#' Set working directory
wd <- setwd("c:/personal/r")
###############################################################################
# This algorithm takes the ELF by Place of Residence that has no Fixed Place of
# work and assigns it to the Quads using the following logic:
# + Assign Tom's estimates of workers with NFPoW to the respective Quads within a CSD
# + For each NAICS, use the skim matrix to tag all Qauds within a user speficied travel time
# + One travel time threshold is used for Construction and another for non-Construction
# as construction workers can travel longer distances
# + Once the candidate Quads are tagged using distance, only those Quads are kept that have
# a corresspoding NAICS employment. This is done to focus the temp POW.
# + With the final Quads selected, they are dissolved to form a continous or discontinous polygon
# + Then tht total number employment by NAICS is randomly smeared across the above polygon
# + A point in polygon is then peformed to tabulate the temp POW by QUad and NAICS
# + Given that such temp employment could visit two/three job sites in a day, this process is
# repeated three times to establish the three different daily locations these workers
# can be found in.
##############################################################################
#' Function for setting NAs
f_na <- function(df) {
# This function is used to set all NA values to zero in a dataframe.
df[is.na(df)] <- 0
return(df)
}
#' function for expanding dataframes
exp_f <- function(df, field){
# This function cleans and expandsd the dataframe while sequentially
# adding in a unique ID
temp <- subset(df, df[[field]] != 0, select = c("CSDUID", "ID", field))
temp <- temp[rep(seq(nrow(temp)), temp[[field]]), 1:2]
return(temp)
}
###############################################################################
#' User defined thresholds
const_val <- 120
other_val <- 60
# fraction that defines the proportion of construction vs other employment.
# user can change this to accommodate different sensitivities, if desired
c_frac <- 0.3
o_frac <- 0.7
###############################################################################
#' Bring in the population file from PopSYn3_Allocation.R. The number of persons
#' per QuadZone will be used as a surrogate for employed labor force. This is
#' preferred as opposed to allocating the 2011 NHS's POR by NAICS2 data to the
#' Quad zones prior to undertaking the analysis.
#'
#' Bring in a clipped QuadTree file. This is important if better accuracy is
#' desired. Because without clipping the QUadTree to the Ontario boundary, a
#' number of Quad Centroids fall outside Ontario thus cannot be assigned to a
#' CSD, which results in missing values for NFPW employment.
#'
#' Also batch in the new files that Tom produced that show the list of employment
#' by NFPoW by CSD and another one by NAICS. Note, Tom did not have a file that
#' combined the two so some code is needed to create a file that has
#' CSD by NAICS for NFPoW.
#'
#' Finally, we need the skim matrix to tag zones within a certain travel time
#' Personss by QT file
person_qt <- read.csv("person_qt.csv", stringsAsFactors = FALSE)
#' Skim matrix
skim <- read.csv("mf100.csv", stringsAsFactors = FALSE, nrows = 6495,
header = TRUE) %>% .[1:6496] # limit the matrix to Quad
#' Tom's files
csd_nfp <- read.csv("Ontario EPOR by POW.csv", stringsAsFactors = FALSE)
naics_nfp <- read.csv("Ontario EPOR and EPOW by Industry.csv", stringsAsFactors = FALSE)
naics_nfp <- transform(naics_nfp, frac = NFPW/sum(naics_nfp$NFPW))
#' batch in the QuadTree and CSD shapefiles and strip out any
#' unnecessary fields
qt <- "QuadTree_clipped"
csd <- "CSD"
cs <- csd_poly@data
write.csv(cs, "csd_ggh.csv")
qt_poly <- readOGR(wd, qt)
q_df <- qt_poly@data
csd_poly <- readOGR(wd, csd) %>% .[, c("CSDUID", "CSDNAME", "GGH")]
# Check if the projection systems for the two Quad Tree and CSD are the same.
# If not the same, then return error and stop()
crs_qt_poly <- proj4string(qt_poly)
crs_csd_poly <- proj4string(csd_poly)
# check if the projections are the same
{
if (crs_qt_poly != crs_csd_poly)
stop("Projection systems are not the same.
Please make them same before proceeding")
}
###############################################################################
# transfer CSD ID to Quad Tree. First get the IDs of the QuadTree for these
# will be used to attach to the centroids of the Quads
qt_id <- qt_poly@data %>% .[-2]
qt_cen = gCentroid(qt_poly, byid = TRUE) %>% SpatialPointsDataFrame(., qt_id)
writeOGR(qt_cen, layer = paste0("QTclipped_cen"), wd,
driver="ESRI Shapefile", overwrite_layer=T )
#' transfer CSD info to each QT centroid
qt_poly1 <- point.in.poly(qt_cen, csd_poly)
qt_poly1_df <- qt_poly1@data %>% subset(., select = c("ID", "CSDUID"))
###############################################################################
#' group persons by Quad. This will act as the surrogate for existing ELF at POR
per_qt <- person_qt %>% group_by(ID) %>% summarise(Per = n()) %>%
merge(., qt_poly1_df, by = "ID", all.x = TRUE) %>% # transfer CSDID
merge(., csd_nfp, by.x = "CSDUID", by.y = "CSD", all.x = TRUE) %>% # transfer 'Total', 'NFPW'
subset(., select = c("CSDUID", "ID", "Per", "NFPW")) %>% na.omit(.)
#' get total jobs in CSDUID and then split the NFPW based on the proportion of
#' each NAICS2
sum <- per_qt %>% group_by(CSDUID) %>% summarise(TotPersons = sum(Per),
TotNFPW = min(NFPW))
per_qt_sum <- merge(per_qt, sum, by = "CSDUID", all.x = TRUE) %>%
transform(., NFPW_Jobs = round((Per/TotPersons)*NFPW),0) %>% na.omit(.) %>%
subset(., NFPW_Jobs != 0)
#' build queries for populating construction and other NFPW
per_qt_sum1 <- transform(per_qt_sum, ConstNFPW =
ifelse(round((NFPW_Jobs*c_frac),0) < 1, NFPW_Jobs,
round((NFPW_Jobs*c_frac),0))) %>%
transform(., OthNFPW = NFPW_Jobs - ConstNFPW) %>% .[order(.$ID), ]
###############################################################################
#' Get the skims ready for batching in. There are rownames but no column names
#' in the current matrix. This will change as we batch in the skims.311 file.
#' For the time being this code block is written to work with the skims that I
#' had currently available with me.
q_names <- skim[1] # get copy of first column as this is the rownames
# fix column names
skim1 <- skim
colnames(skim1)[2:6496] <- q_names[,1]
#' subset dataframe to all those Quad zones that have NFPW and then convert to
#' long format. Also get rid of trip interchanges with a travel time of greater
#' than 120 mins. These are not considered candidates
selectedRows <- skim1$p.q..val. %in% per_qt_sum$ID
skim2 <- skim1[selectedRows,]
#' melt and subset
skim2_long <- melt (skim2, id.vars = "p.q..val.") %>%
setNames(., c("Origin", "Dest", "Ttime")) %>%
subset(., Ttime < const_val) %>% .[order(.$Origin), ]
write.csv(skim2_long, "skims_long.csv")
###############################################################################
#' Expand the data that is to be assigned. This is needed as each record will get
#' appended to a randomly generated point
#' Create two **unexpanded** dataframes; one for Other and one for Const NFPW
other <- subset(per_qt_sum1, OthNFPW != 0, select = c("ID", "OthNFPW")) %>%
.[order(.$ID),]
const <- subset(per_qt_sum1, ConstNFPW != 0, select = c("ID", "ConstNFPW")) %>%
.[order(.$ID),]
#' create two **expanded** dataframes; one for Other and one for Const NFPW
other_df <- exp_f(per_qt_sum1, "OthNFPW") %>%
transform(., UID = seq(1, length.out = nrow(.))) %>% .[order(.$ID), ]
#' the UID for the construction dataframe needs to be renumbered based on the numbers
const_df <- exp_f(per_qt_sum1, "ConstNFPW") %>%
transform(., UID = seq(nrow(other_df)+1, length.out = sum(per_qt_sum1$ConstNFPW))) %>%
.[order(.$ID), ]
###############################################################################
###############################################################################
############################ BEGIN ALLOCATION #################################
#' Get set of Other and Construction NFPW jobs for ***FIRST Quad Zone***
#' We are using a function for doing the needful - first_f
first_f <- function(num, data, val, data_df){
#' This function creates the first of the random points.
#' Num - the QuadZone id; should start with 1 as that is the first row
#' data - unexpanded dataframe of the CONST or OTHER NFPW
#' val - travltime value defined by the user at the beginning of script
#' data_df - exanded dataframe of the CONST or OTHER NFPW
# get the first quad
first_quad <- data$ID[num]
# select skims that belong to First Quad Zone
orig_first <- subset(skim2_long, Origin == first_quad & Ttime <= val)
#' query for selecting destination Quad zones
dest_query <- qt_poly@data$ID %in% orig_first$Dest %>% cbind(., q_df) %>%
setNames(., c("cond", "ID")) %>%
subset(., cond == TRUE) %>%
subset(., select = "ID")
# merge the query to the database of the shapefile
qt_poly@data = data.frame(qt_poly@data,
dest_query[match(qt_poly@data$ID, dest_query$ID),])
#' Now subset the Quad Tree to only select those records that do not contain a NA.
#' This essentially will give only those Quads that were successfully joined.
qt_poly_i <- qt_poly[!is.na(qt_poly@data[[3]]),]
# to contain the dissolving ID
qt_poly_i@data$Diss <- first_quad
# dissolve QUads
region <- gUnaryUnion(qt_poly_i, id = qt_poly_i@data$Diss)
# get spatial dataframe of points
nfpw_i <- subset(data_df, ID == first_quad)
# randomly assign the points
sp <- spsample(region, nrow(nfpw_i), type = "random", iter = 6)
#' convert to spatial points data frame
SPDF = SpatialPointsDataFrame(sp, nfpw_i)
#' reset the third column of the Quad Zones to 0.
qt_poly@data[3] <- 0
return(SPDF)
}
#' use the function to generate the first set Quad zone points for Other
#' and Const NFPW
other_first <- first_f(1, other, other_val, other_df)
const_first <- first_f(1, const, const_val, const_df)
###############################################################################
#' Now enumerate over the remaining Quad zones. First Other and then Const NFPW
#' generate progress bar
pb <- winProgressBar(title="Example progress bar", label="0% done",
min=0, max=100, initial=0)
# get list of QuadZones for both Const and Other NFPW
enum_other <- as.list(other$ID)
enum_const <- as.list(const$ID)
#' This code block runs the random sample point generation procedure for the
#' rest of the Quad zones for Other NFPW
for (i in 2:length(enum_other)){
#' use function to populate each Quadzone catchment area
other_rem <- first_f(i, other, other_val, other_df)
#' cumulatively merge the points to the first quad zone
other_first <- spRbind(other_first, other_rem)
#' update progress bar
Sys.sleep(0.1) # slow down the code for illustration purposes
info <- sprintf("%d%% done", round((i/length(enum_other))*100))
setWinProgressBar(pb, i/length(enum_other)*100, label=info)
}
#' This code block runs the random sample point generation procedure for the
#' rest of the Quad zones for Construction NFPW
for (i in 2:length(enum_const)){
#' use function to populate each Quadzone catchment area
const_rem <- first_f(i, const, const_val, const_df)
#' cumulatively merge the points to the first quad zone
const_first <- spRbind(const_first, const_rem)
#' update progress bar
Sys.sleep(0.1) # slow down the code for illustration purposes
info <- sprintf("%d%% done", round((i/length(enum_const))*100))
setWinProgressBar(pb, i/length(enum_const)*100, label=info)
}
#' write shapefile
writeOGR(other_first, layer = paste0("NFPW_Other"), wd,
driver="ESRI Shapefile", overwrite_layer=T )
writeOGR(const_first, layer = paste0("Const_Other"), wd,
driver="ESRI Shapefile", overwrite_layer=T )