Raw data processed

This updates `process_raw_data.R` to process all data in `root/data-raw/results_csv`. Some work is still needed to properly update the database at `.data/chemo-dash.sqlite`
IDSS-NIAID · Nov 30, 2023 · 0d5dcaa · 0d5dcaa
1 parent 9c422b8
commit 0d5dcaa
Show file tree

Hide file tree

Showing 3 changed files with 197 additions and 146 deletions.
diff --git a/R/process_experiments.R b/R/process_experiments.R
@@ -80,13 +80,12 @@ process_experiments <- function(experiment, source_dir, results_dir, seed = NULL
                            gsub(pattern = '.csv', replacement = '', fixed = TRUE) %>%
                            grep(pattern = '^[0-9]$', invert = TRUE, value = TRUE) %>% # drop any single digit numbers
                            grep(pattern = 'CH[1-6]', invert = TRUE, value = TRUE) %>% # drop channel
-                           grep(pattern = 'fMLF|Basal|Buffer|C5a|SDF|IL.|LTB4', ignore.case = TRUE, invert = TRUE, value = TRUE) %>% # drop attractant
-                           grep(pattern = 'I8RA', invert = TRUE, value = TRUE)}[1]), # catch a typo
-
+                           grep(pattern = 'fMLF|Basal|Buffer|C5a|SDF|IL.|LTB4', ignore.case = TRUE, invert = TRUE, value = TRUE)}[1]), # drop attractant
+
     treatment = map_chr(dat, ~
                           {.x %>%
                               gsub(pattern = '.csv', replacement = '', fixed = TRUE) %>%
-                              grep(.x, pattern = 'fMLF|Basal|Buffer|C5a|SDF|IL.|LTB4|I8RA',
+                              grep(.x, pattern = 'fMLF|Basal|Buffer|C5a|SDF|IL.|LTB4',
                                    ignore.case = TRUE, value = TRUE)}[1])) %>%
 
     mutate(sample = tolower(sample)) %>% # inconsistent capitalization
@@ -107,8 +106,7 @@ process_experiments <- function(experiment, source_dir, results_dir, seed = NULL
     left_join(results_meta, by = 'f') %>%
 
     # pull experiment name
-    mutate(experiment = map_chr(f, ~ (strsplit(.x, '_CH', fixed = TRUE)[[1]][1]) %>%
-                                  gsub(pattern = '_', replacement = '', fixed = TRUE))) %>%
+    mutate(experiment = map_chr(f, ~ str_sub(.x, start = 1, end = 8))) %>%
 
     filter(!is.na(X) & !is.na(Y))
 
@@ -117,17 +115,17 @@ process_experiments <- function(experiment, source_dir, results_dir, seed = NULL
   set.seed(seed)
 
   retval <- map(experiment, ~ one_experiment(dat_sub = filter(dat, experiment == .x),
-                                             experiment = experiment,
+                                             experiment = .x,
                                              results_dir = results_dir, 
                                              sig.figs = sig.figs,
                                              ledge_dist = ledge_dist))
 
-  list(expSummary   = map_df(retval[[1]]$expSummary, ~ .x),
-       expStats     = map_df(retval[[1]]$expStats, ~ .x),
-       chanSummary  = map_df(retval[[1]]$chanSummary, ~ .x),
-       chanRaw      = map_df(retval[[1]]$chanRaw, ~ .x),
-       trackSummary = map_df(retval[[1]]$trackSummary, ~ .x),
-       trackRaw     = map_df(retval[[1]]$trackRaw, ~ .x))
+  list(expSummary   = map_df(retval, ~ .x$expSummary),
+       expStats     = map_df(retval, ~ .x$expStats),
+       chanSummary  = map_df(retval, ~ .x$chanSummary),
+       chanRaw      = map_df(retval, ~ .x$chanRaw),
+       trackSummary = map_df(retval, ~ .x$trackSummary),
+       trackRaw     = map_df(retval, ~ .x$trackRaw))
 }
 
 
@@ -443,7 +441,8 @@ one_experiment <- function(dat_sub, experiment, results_dir, sig.figs = 4, ledge
     mutate(ntrts = length(unique(treatment))) %>%
 
     # drop any groups that only have one channel (nothing to compare)
-    dplyr::filter(ntrts > 1)
+    dplyr::filter(ntrts > 1,
+                  !is.na(sample))
 
   # if we have different between-treatment statistics to calculate...
   if(nrow(btw_trt) > 0)
@@ -539,6 +538,8 @@ one_experiment <- function(dat_sub, experiment, results_dir, sig.figs = 4, ledge
                        treatment = '',
                        trt_a = '',
                        trt_b = '',
+                       a = '',
+                       b = '',
                        a_vs_b = list('')) %>%
       filter(!is.na(date))
   }

diff --git a/data-raw/process_19000101_data.R b/data-raw/process_19000101_data.R
@@ -0,0 +1,151 @@
+# process_19000101_data.R
+
+library(ChemotaxisDashboard)
+library(magrittr)
+
+# start up parallel back end
+parallel::makeCluster(parallel::detectCores() - 1, 
+                      type = "PSOCK"
+                      ) %>%
+  doParallel::registerDoParallel()
+
+# process test data
+processed_data <- process_experiments('19000101',
+                                      source_dir = system.file("extdata", package = "ChemotaxisDashboard"),
+                                      results_dir = file.path(system('git rev-parse --show-toplevel', intern = TRUE), '.data'),
+                                      seed = 923847)
+
+
+
+#####################################
+# data.frames for RSQLite db tables #
+#####################################
+
+#' users
+#' User table for authentication
+#' 
+#' @param user Character, username - used in: 
+#'   `access`
+#' @param password Character, password
+users <- data.frame(user = "shinyuser", # default user
+                    password = "12345")
+
+
+#' access
+#' User access table defining which experiments the user can access
+#' 
+#' @param user Character, maps to `users$user`
+#' @param expID Character, maps to `expSummary$expID`
+access <- data.frame(user = "shinyuser",
+                     expID = "19000101") # default user has access to test data only
+
+
+#' expSummary
+#' Experiment summary table
+#' 
+#' @param expID Character, experiment ID - used in: 
+#'   `access`
+#'   `expStats`
+#'   `chanSummary`
+#'   `chanRaw`
+#'   `trackSummary`
+#'   `trackRaw`
+#' @param tracks_time Character, path to figure of tracks over time for each channel
+#' @param tracks_v Character, path to figure of velocity over time for each grouping
+#' @param angle_migration Character, path to figure of angle of migration viloin plots for each channel
+#' @param ce Character, path to figure of chemotactic efficiency violin plots for each channel
+expSummary <- processed_data$expSummary
+
+
+#' expStats
+#' Table of summary statistics for each experiment
+#' 
+#' @param expID Character, maps to `expSummary$expID`
+#' @param within Character, group for the comparison (i.e. within normals treated with fMLF8)
+#' @param between Character, contrast for the comparison (i.e. between channels 3 and 4)
+#' @param test Character, test used to compare the `between` groups
+#' @param stat Double, test statistics comparing the `between` groups
+#' @param p Double, p-value for `stat`
+expStats <- processed_data$expStats
+
+
+#' chanSummary
+#' Channel summary table
+#' 
+#' @param expID Character, maps to `expSummary$expID`
+#' @param chanID Integer, channel ID - used in:
+#'   `chanRaw`
+#'   `trackRaw`
+#'   `trackSummary`
+#' @param sID Character, sample ID - used in:
+#' @param treatment Character, treatment applied to this channel
+#' @param tot_finished Integer, Total number of cells that reached the bottom ledge
+#' @param prop_finished Double, proportion of cells that reached the bottom ledge
+#' @param ce_median Double, median chemotactic efficiency
+#' @param ce_mean Double, mean chemotactic efficiency
+#' @param ce_sd Double, standard deviation of chemotactic efficiency
+#' @param angle_median Double, median angle of migration
+#' @param angle_mean Double, mean angle of migration
+#' @param angle_sd Double, standard deviation of angle of migration
+#' @param max_v_median Double, median maximum velocity
+#' @param max_v_mean Double, mean maximum velocity
+#' @param max_v_sd Double, standard deviation of maximum velocity
+#' @param dvud Double, dissimilarity score comparing directed and undirected trajectories
+#' @param dvud_p Double, p-value for `dvud`
+chanSummary <- processed_data$chanSummary
+
+
+#' chanRaw
+#' Table of smoothed trajectories over all tracks in a channel
+#' 
+#' @param expID Character, maps to `expSummary$expID`
+#' @param chanID Integer, maps to `chanSummary$chanID`
+#' @param x Double, smoothed x-position for the channel
+#' @param y Double, smoothed y-position for the channel
+#' @param frames Integer, frame (sampled every 30 seconds)
+#' @param v_x Double, velocity in the x direction (undirected)
+#' @param v_y Double, velocity in the y direction (directed)
+#' @param v Double, total velocity
+chanRaw <- processed_data$chanRaw
+
+
+#' trackSummary
+#' Track summary table
+#' 
+#' @param expID Character, maps to `expSummary$expID`
+#' @param chanID Integer, maps to `chanSummary$chanID`
+#' @param trackID Integer, track ID - used in:
+#'   `trackRaw`
+#' @param ce Double, chemotactic efficiency
+#' @param angle_migration Double, angle of migration
+#' @param max_v Double, maximum velocity in μm per minute
+#' @param av_velocity Double, mean velocity in μm per minute
+#' @param finished  Logical, TRUE when the cell passed the bottom ledge
+trackSummary <- processed_data$trackSummary
+
+
+#' trackRaw
+#' Raw track information
+#' 
+#' @param expID Character, maps to `expSummary$expID`
+#' @param chanID Integer, maps to `chanSummary$chanID`
+#' @param trackID Integer, maps to `trackSummary$trackID`
+#' @param x Double, x-position for the track
+#' @param y Double, y-position for the track
+#' @param frames Integer, frame (sampled every 30 seconds)
+#' @param v_x Double, velocity in the x direction (undirected)
+#' @param v_y Double, velocity in the y direction (directed)
+#' @param v Double, total velocity
+trackRaw <- processed_data$trackRaw
+
+
+# export for internal use
+usethis::use_data(users,
+                  access,
+                  expSummary,
+                  expStats,
+                  chanSummary,
+                  chanRaw,
+                  trackSummary,
+                  trackRaw,
+                  internal = TRUE, overwrite = TRUE)