-
Notifications
You must be signed in to change notification settings - Fork 0
/
server_utils.R
234 lines (215 loc) · 7.97 KB
/
server_utils.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# The handwriterAppDemo R package performs writership analysis of handwritten
# documents. Copyright (C) 2024 Iowa State University of Science and Technology
# on behalf of its Center for Statistics and Applications in Forensic Evidence
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <https://www.gnu.org/licenses/>.
#' Copy Documents to Project Directory
#'
#' When the user selects documents the files are copied to main_dir > data >
#' model_docs or main_dir > data > questioned_docs so that all documents used in
#' the analysis are stored in the project folder.
#'
#' @param paths The filepaths of the writing samples
#' @param main_dir A filepath to the project folder
#' @param subfolder (optional) Subfolder name within data folder
#'
#' @return NULL
#'
#' @noRd
copy_docs_to_project <- function(paths, main_dir, subfolder = NULL){
names <- basename(paths)
if (!is.null(subfolder)){
lapply(1:length(paths), function(i) {
file.copy(paths[i], file.path(main_dir, "data", subfolder, names[i]))}
)
} else {
lapply(1:length(paths), function(i) {
file.copy(paths[i], file.path(main_dir, "data", names[i]))
})
}
}
#' Create a Directory
#'
#' This helper function creates a directory if it doesn't already exist.
#'
#' @param folder A filepath for the new directory
#'
#' @return NULL
#'
#' @noRd
create_dir <- function(folder){
if (!dir.exists(folder)){
dir.create(folder)
}
}
#' Delete the Demo Folder
#'
#' Delete the demo folder and its contents from the temporary directory.
#'
#' @return NULL
#'
#' @noRd
delete_demo_dir <- function() {
unlink(file.path(tempdir(), "demo"), recursive = TRUE)
}
#' List Handwriting Samples in Folder
#'
#' This helper function lists the documents in main_dir > data > model_docs or
#' main_dir > data > questioned_docs and returns the filenames in a vector.
#'
#' @param main_dir A filepath to the project folder
#' @param type Either "model" or "questioned"
#' @param filepaths TRUE to return the full filepaths or FALSE to return the filenames only
#'
#' @return List
#'
#' @noRd
list_docs <- function(main_dir, type = "model", filepaths = TRUE){
docs <- list.files(file.path(main_dir, "data", paste0(type, "_docs")), pattern = ".png", full.names = filepaths)
return(docs)
}
#' Get the Filenames of the Questioned Documents
#'
#' This helper function returns the full filepath of the questioned
#' document in main_dir > data > questioned_docs.
#'
#' @param main_dir A filepath to the project folder
#'
#' @return Filename
#'
#' @noRd
list_names_in_named_vector <- function(paths){
if (length(paths) > 0 ) {
# get names list where names are the filepaths and values are the filenames
names <- sapply(paths, basename)
# swap names and values so the names are the filenames and the values are the filepaths
names <- stats::setNames(names(names), names)
return(names)
}
}
#' Load Processed Document
#'
#' This helper function loads the document
#' processed with handwriter::processDocument
#'
#' @param main_dir A filepath to the project folder
#' @param name Filename of the document
#' @param type Either "model" or "questioned
#'
#' @return Processed document as a list
#'
#' @noRd
load_processed_doc <- function(main_dir, name, type){
# drop file extension from name
name <- stringr::str_replace(name, ".png", "")
graphs <- list.files(file.path(main_dir, "data", paste0(type, "_graphs")), pattern = ".rds", full.names = TRUE)
graphs <- graphs[grepl(name, graphs)]
graphs <- readRDS(graphs)
return(graphs)
}
#' Load Document
#'
#' This helper function loads a document as an image with the magick package.
#'
#' @param path The filepath for the document
#'
#' @return An image
#'
#' @noRd
load_image <- function(path){
if (!is.null(path) && file.exists(path)){
return(magick::image_read(path))
}
}
#' Make Dataframe of Posterior Probabilities
#'
#' This helper function formats the posterior probabilities of
#' writership stored in analysis calculated with
#' handwriter::analyze_questioned_document. The posterior probabilities
#' are placed in a dataframe with columns Known Writer and Posterior Probability
#' of Writership. The posterior probabilities are formatted as percentages.
#'
#' @param analysis analysis created with handwriter::analyze_questioned_document
#'
#' @return Processed document as a list
#'
#' @noRd
make_posteriors_df <- function(analysis){
# prevent note: "no visible binding for global variable"
post_probs <- NULL
df <- analysis$posterior_probabilities
# Format posterior probabilities as percentage
qd_columns <- colnames(df)[-1]
df <- df %>% tidyr::pivot_longer(cols = tidyr::all_of(qd_columns),
names_to = "qd",
values_to = "post_probs")
df <- df %>% dplyr::mutate(post_probs = paste0(100*post_probs, "%"))
df <- df %>% tidyr::pivot_wider(names_from = "qd", values_from = "post_probs")
# Change "known_writer" to "Known Writer"
colnames(df)[1] <- "Known Writer"
return(df)
}
#' Reset the App
#'
#' Reset the global reactive values.
#'
#' @param global
#' @param local
#'
#' @return NULL
#'
#' @noRd
reset_app <- function(global) {
global$analysis <- NULL
global$known_names <- NULL
global$known_paths <- NULL
global$main_dir <- NULL
global$model <- NULL
global$qd_names <- NULL
global$qd_paths <- NULL
}
#' Setup Main Directory
#'
#' This helper function creates directory called "data" inside the main directory.
#' Inside the "data" directory, "questioned_docs," "questioned_graphs," and "model_docs"
#' directories are created. The data object "template" is saved in the "data" directory
#' as template.rds. These directories and the template.rds file are required by the
#' handwriter package to analyze a questioned document.
#'
#' @param main_dir A filepath to the project folder
#'
#' @return Processed document as a list
#'
#' @noRd
setup_main_dir <- function(main_dir){
# create directory structure in main directory
create_dir(file.path(main_dir, "data"))
dirs <- c("model_docs", "model_graphs", "model_clusters", "questioned_docs", "questioned_graphs", "questioned_clusters")
lapply(dirs, function(d) create_dir(file.path(main_dir, "data", d)))
# handwriter requires template.rds to exist in main_dir > data
load("data/templateK40.rda")
saveRDS(templateK40, file.path(main_dir, "data", "template.rds"))
# copy model graphs and clusters to main_dir > data to save processing time
model_graphs <- list.files(file.path("data", "model_graphs"), full.names = TRUE)
model_clusters <- list.files(file.path("data", "model_clusters"), full.names = TRUE)
copy_docs_to_project(paths = model_graphs, main_dir = main_dir, subfolder = "model_graphs")
copy_docs_to_project(paths = model_clusters, main_dir = main_dir, subfolder = "model_clusters")
copy_docs_to_project(paths = file.path("data", "model_clusters.rds"), main_dir = main_dir, subfolder = NULL)
# copy questioned graphs and clusters to main_dir > data to save processing time
questioned_graphs <- list.files(file.path("data", "questioned_graphs"), full.names = TRUE)
questioned_clusters <- list.files(file.path("data", "questioned_clusters"), full.names = TRUE)
copy_docs_to_project(paths = questioned_graphs, main_dir = main_dir, subfolder = "questioned_graphs")
copy_docs_to_project(paths = questioned_clusters, main_dir = main_dir, subfolder = "questioned_clusters")
copy_docs_to_project(paths = file.path("data", "questioned_clusters.rds"), main_dir = main_dir, subfolder = NULL)
}