-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathc_data_download_collation.R
103 lines (99 loc) · 3.85 KB
/
c_data_download_collation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# Source functions for this {targets} list
tar_source("c_data_download_collation/src/")
# Download and process GEE output from GLORIA/Maciel pulls -------------
# this pipeline collates all of the GEE output files for the GLORIA locations
# and for the Maciel et al filtered/matched dataset.
# prep folder structure
suppressWarnings({
dir.create("c_data_download_collation/in/")
dir.create("c_data_download_collation/mid/")
dir.create("c_data_download_collation/out/")
})
c_data_download_collation <- list(
# download the NW and CLP data from Google Drive
tar_target(
name = b_downloaded_historical_NW_CLP,
command = {
a_collated_pts_to_csv
download_csvs_from_drive(drive_folder_name = paste0("LS-C2-SR-NW_CLP_Poly-Points-v",
Sys.getenv("nw_clp_pull_version_date")),
version_identifier = Sys.getenv("nw_clp_pull_version_date"))
},
packages = c("tidyverse", "googledrive"),
cue = tar_cue(depend = T)
),
# and do the same for the regional data
tar_target(
name = b_downloaded_historical_regional,
command = {
a_collated_pts_to_csv
download_csvs_from_drive(drive_folder_name = paste0("LS-C2-SR-RegionalPoints-v",
Sys.getenv("regional_pull_version_date")),
version_identifier = Sys.getenv("regional_pull_version_date"))
},
packages = c("tidyverse", "googledrive")
),
# and load/collate those data, with each type as a new feather file
# first with the NW/CLP data
tar_target(
name = b_collated_historical_NW_CLP,
command = {
b_downloaded_historical_NW_CLP
collate_csvs_from_drive(file_prefix = "NW-Poudre-Historical",
version_identifier = Sys.getenv("nw_clp_pull_version_date"))
},
packages = c("tidyverse", "feather")
),
# and now for the regional data
tar_target(
name = b_collated_historical_regional,
command = {
b_downloaded_historical_regional
collate_csvs_from_drive(file_prefix = "NW-Poudre-Regional",
version_identifier = Sys.getenv("regional_pull_version_date"))
},
packages = c("tidyverse", "feather")
),
# now, add metadata to tabular summaries and break out the DSWE 1/3 data
# first for the regional data
tar_target(
name = b_combined_regional_metadata_data,
command = {
b_collated_historical_regional
combine_metadata_with_pulls(file_prefix = "NW-Poudre-Regional",
version_identifier = Sys.getenv("regional_pull_version_date"),
collation_identifier = Sys.getenv("collation_date"))
},
packages = c("tidyverse", "feather")
),
# and then for the NW/CLP data
tar_target(
name = b_combined_NW_CLP_metadata_data,
command = {
b_collated_historical_NW_CLP
combine_metadata_with_pulls(file_prefix = "NW-Poudre-Historical",
version_identifier = Sys.getenv("nw_clp_pull_version_date"),
collation_identifier = Sys.getenv("collation_date"))
},
packages = c("tidyverse", "feather")
),
# make a list of the collated files to branch over
tar_target(
name = b_collated_files,
command = {
b_combined_NW_CLP_metadata_data
b_combined_regional_metadata_data
list.files('b_historical_RS_data_collation/out/',
full.names = T,
pattern = Sys.getenv("collation_date")) %>%
.[grepl('collated', .)]
}
),
# pass the QAQC filter over each of the listed files, creating filtered files
tar_target(
name = b_QAQC_filtered_data,
command = baseline_QAQC_RS_data(filepath = b_collated_files),
packages = c("tidyverse", "feather"),
pattern = map(b_collated_files)
)
)