-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataRead.R
52 lines (41 loc) · 2.38 KB
/
dataRead.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# SCript to get all the data for the project from the EPA website
library(rvest)
# get urls for file download
pg <- read_html("https://aqs.epa.gov/aqsweb/airdata/download_files.html")
links <- data.frame(url = html_attr(html_nodes(pg, "a"), "href"))
d_urls <- data.frame(file_name = links[grep("hourly_WIND_2018", links$url),])
d_urls <- rbind(d_urls,data.frame(file_name = links[grep("hourly_TEMP_2018", links$url),]))
d_urls <- rbind(d_urls,data.frame(file_name = links[grep("hourly_44201_2018", links$url),]))
d_urls <- rbind(d_urls,data.frame(file_name = links[grep("hourly_42401_2018", links$url),]))
d_urls <- rbind(d_urls,data.frame(file_name = links[grep("hourly_42101_2018", links$url),]))
d_urls <- rbind(d_urls,data.frame(file_name = links[grep("hourly_42602_2018", links$url),]))
d_urls <- rbind(d_urls,data.frame(file_name = links[grep("hourly_88101_2018", links$url),]))
d_urls <- rbind(d_urls,data.frame(file_name = links[grep("hourly_81102_2018", links$url),]))
d_urls <- rbind(d_urls,data.frame(file_name = links[grep("daily_aqi_by_county", links$url),]))
d_urls <- rbind(d_urls,data.frame(file_name = links[grep("daily_44201_2018", links$url),]))
d_urls <- rbind(d_urls,data.frame(file_name = links[grep("daily_42401_2018", links$url),]))
d_urls <- rbind(d_urls,data.frame(file_name = links[grep("daily_42101_2018", links$url),]))
d_urls <- rbind(d_urls,data.frame(file_name = links[grep("daily_42602_2018", links$url),]))
d_urls <- rbind(d_urls,data.frame(file_name = links[grep("daily_88101_2018", links$url),]))
d_urls <- rbind(d_urls,data.frame(file_name = links[grep("daily_81102_2018", links$url),]))
d_urls$url_name <- paste("https://aqs.epa.gov/aqsweb/airdata/", d_urls$file_name,sep="")
oldw <- getOption("warn")
options(warn = -1)
#Check if the folder "Data" exists in the current directory, if not creates it
ifelse(!dir.exists("data"), dir.create("data"),"")
setwd(paste0(getwd(), "/data"))
# download the relevant files
for (row in 1:nrow(d_urls)) {
link <- d_urls[row,"url_name"]
if(!file.exists(basename(link))){
print(basename(link))
print("File not found")
tryCatch(download.file(link,file.path(basename(link)), method = "libcurl"),
error = function(e) print(paste(link, 'did not work out')))
}
}
options(warn = oldw)
# unzip the data files
files <- list.files(pattern=".*.zip", full.names=TRUE, recursive=FALSE)
sapply(files, unzip)
setwd("../")