forked from CIDA-CSPH/CIDAtools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
backup_project.R
176 lines (146 loc) · 6.11 KB
/
backup_project.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#'Backup Project Directory
#'
#'This function backs up a CIDA project to the shared (P) CIDA drive. The backup directory
#'can either be existing (in which only changed files/folders are updated), or
#'nonexisting, in which case a full project backup is created.
#'
#'@param path_from Path from where the folders should be copied (project
#' directory location).
#'@param path_to Path to where the folders should be copied (P drive, only used
#' if specified).
#'@param exclude files/folders NOT to be backed up to the P-drive (useful for
#'larger files that don't change often). Currently not used.
#'@param recreate should backup be created from the ground up?
#' (can take longer, but useful for projects with many changes)
#' @param data_only should only subdirs including "data" (DataRaw/ and DataProcessed/) be backed up?
#' @param readme forces backup of project readme
#' @return This function has verbose output to ensure the back up is working, and
#' ultimately returns a success indicator that's returned by file.copy.
#'
#'@export
backup_project <- function(path_from = getwd(),
path_to = NULL,
exclude = c(".DS_Store", ".Rproj.user", ".git"),
recreate = FALSE,
data_only = TRUE,
readme = TRUE) {
# Check args, make into absolute paths
path_from <- normalizePath(path_from)
# Get proper path to Shared drive
if(missing(path_to)) {
path_to <- ProjectLocation()
if(path_to == "")
stop("Please first set project location, e.g., CIDAtools::SetProjectLocation('Branches/EmergencyMedicine/ThisProject')")
if(!dir.exists(CIDAtools::CIDA_drive_path()))
stop("Please ensure the CIDA drive is mounted, or set `path_to`")
}
path_to <- normalizePath(path_to)
## Check if specific project folder exists on P drive
backup_path <- file.path(path_to)
if(!dir.exists(backup_path)) {
message("Note: '", backup_path, "' not found; directory was created.")
dir.create(backup_path)
} else if (!recreate){
message("Note: backup path already exists, and will be updated unless cancelled.")
message("Backup path:\n", backup_path,
"\nProject path:\n",
path_from)
} else {
message("Note: backup path already exists, and will be completely overwritten since recreate == TRUE.")
message("Backup path:\n", backup_path,
"\nProject path:\n",
path_from, "\n\nType 'yes' to confirm.")
delete_old <- readline()
if(delete_old != "yes")
stop("Cancelled")
unlink(backup_path, recursive=TRUE)
dir.create(backup_path)
}
message("\nDetermining current backup situation...")
files_to_copy <-
list.files(path_from, recursive = T, all.files = T)
dirs_to_copy <- list.dirs(path_from, recursive = T, full.names = F)[-1]
if(length(exclude)) {
files_to_exclude <- c(
unlist(sapply(exclude[dir.exists(exclude)], list.files, recursive = TRUE,
all = TRUE, full.names = T)),
exclude[!dir.exists(exclude)])
files_to_copy <- files_to_copy[!(files_to_copy %in% files_to_exclude)]
dirs_to_exclude <- c(
unlist(sapply(exclude[dir.exists(exclude)], list.dirs, recursive = TRUE,
full.names = T)))
dirs_to_copy <- dirs_to_copy[!(dirs_to_copy %in% dirs_to_exclude)]
}
if(data_only) {
string_matches <- "dataraw|dataprocessed"
if(readme)
string_matches <- "readme|dataraw|dataprocessed"
# find large files (>= 250 MB)
large_idx <- file.size(files_to_copy)/1e6 >= 250
# find file matches
file_matches <- grepl(string_matches, files_to_copy, ignore.case = TRUE)
dir_matches <- grepl(string_matches, dirs_to_copy, ignore.case = TRUE)
files_to_copy <- files_to_copy[large_idx | file_matches]
dirs_to_copy <- dirs_to_copy[large_idx | dir_matches]
}
## Check if any files can be ignored using time last modified time
check <- file.exists(file.path(backup_path, files_to_copy))
if(any(check)) {
to_mtime <- file.mtime(file.path(backup_path, files_to_copy))
# If no file found, set last modified time into future (kind of a hack)
to_mtime[is.na(to_mtime)] <- Sys.time() +500
from_mtime <- file.mtime(file.path(path_from, files_to_copy))
files_to_copy <- files_to_copy[abs(difftime(to_mtime, from_mtime, units = "secs")) > 1]
}
# Check and don't copy dirs if they already exist
dirs_to_copy <- dirs_to_copy[!dir.exists(file.path(backup_path, dirs_to_copy))]
message("\nI'm about to create or update ",length(dirs_to_copy)," subdirectories and ",
length(files_to_copy), " files.",
"'\nType 'yes' to confirm, or 'list' to list changes.")
val <- readline()
if(val == "list") {
cat("Subdirs:", dirs_to_copy, sep = "\n")
cat("\n\nFiles:", files_to_copy, sep = "\n")
message("'\n\n Type 'yes' to confirm.")
val <- readline()
}
stopifnot(val == "yes")
if(length(dirs_to_copy)) {
message("Creating ", length(dirs_to_copy)," subdirectories...")
pb <- dplyr::progress_estimated(length(dirs_to_copy))
r1 <- sapply(1:length(dirs_to_copy), function(i) {
pb$tick()$print()
dir.create(file.path(backup_path, dirs_to_copy[i]))
})
} else
r1 <- T
if(length(files_to_copy)) {
message("\nCopying/updating ", length(files_to_copy), " files...")
pb <- dplyr::progress_estimated(length(files_to_copy))
r2 <- sapply(1:length(files_to_copy), function(i) {
pb$tick()$print()
file.copy(file.path(path_from, files_to_copy[i]),
file.path(backup_path, files_to_copy[i]),
overwrite = TRUE, copy.date = TRUE)
})
} else
r2 <- T
result <- all(r1) & all(r2)
create_backup_info(backup_path)
return(invisible(result))
}
create_backup_info <- function(path) {
fileConn<- file(file.path(path, "backup_info.md"))
lines <- c(
"This is a backup of the actual project directory. ",
"",
"DO NOT EDIT THIS DIRECTORY.",
"",
"If you do, changes may be overwritten by future backups.",
"",
paste0("This directory was last backed up at ", Sys.time()),
""
)
writeLines(lines, fileConn)
close(fileConn)
}