Skip to content

Commit

Permalink
Raw data processed
Browse files Browse the repository at this point in the history
This updates `process_raw_data.R` to process all data in `root/data-raw/results_csv`. Some work is still needed to properly update the database at `.data/chemo-dash.sqlite`
  • Loading branch information
johnsonra committed Nov 30, 2023
1 parent 9c422b8 commit 0d5dcaa
Show file tree
Hide file tree
Showing 3 changed files with 197 additions and 146 deletions.
29 changes: 15 additions & 14 deletions R/process_experiments.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,12 @@ process_experiments <- function(experiment, source_dir, results_dir, seed = NULL
gsub(pattern = '.csv', replacement = '', fixed = TRUE) %>%
grep(pattern = '^[0-9]$', invert = TRUE, value = TRUE) %>% # drop any single digit numbers
grep(pattern = 'CH[1-6]', invert = TRUE, value = TRUE) %>% # drop channel
grep(pattern = 'fMLF|Basal|Buffer|C5a|SDF|IL.|LTB4', ignore.case = TRUE, invert = TRUE, value = TRUE) %>% # drop attractant
grep(pattern = 'I8RA', invert = TRUE, value = TRUE)}[1]), # catch a typo

grep(pattern = 'fMLF|Basal|Buffer|C5a|SDF|IL.|LTB4', ignore.case = TRUE, invert = TRUE, value = TRUE)}[1]), # drop attractant

treatment = map_chr(dat, ~
{.x %>%
gsub(pattern = '.csv', replacement = '', fixed = TRUE) %>%
grep(.x, pattern = 'fMLF|Basal|Buffer|C5a|SDF|IL.|LTB4|I8RA',
grep(.x, pattern = 'fMLF|Basal|Buffer|C5a|SDF|IL.|LTB4',
ignore.case = TRUE, value = TRUE)}[1])) %>%

mutate(sample = tolower(sample)) %>% # inconsistent capitalization
Expand All @@ -107,8 +106,7 @@ process_experiments <- function(experiment, source_dir, results_dir, seed = NULL
left_join(results_meta, by = 'f') %>%

# pull experiment name
mutate(experiment = map_chr(f, ~ (strsplit(.x, '_CH', fixed = TRUE)[[1]][1]) %>%
gsub(pattern = '_', replacement = '', fixed = TRUE))) %>%
mutate(experiment = map_chr(f, ~ str_sub(.x, start = 1, end = 8))) %>%

filter(!is.na(X) & !is.na(Y))

Expand All @@ -117,17 +115,17 @@ process_experiments <- function(experiment, source_dir, results_dir, seed = NULL
set.seed(seed)

retval <- map(experiment, ~ one_experiment(dat_sub = filter(dat, experiment == .x),
experiment = experiment,
experiment = .x,
results_dir = results_dir,
sig.figs = sig.figs,
ledge_dist = ledge_dist))

list(expSummary = map_df(retval[[1]]$expSummary, ~ .x),
expStats = map_df(retval[[1]]$expStats, ~ .x),
chanSummary = map_df(retval[[1]]$chanSummary, ~ .x),
chanRaw = map_df(retval[[1]]$chanRaw, ~ .x),
trackSummary = map_df(retval[[1]]$trackSummary, ~ .x),
trackRaw = map_df(retval[[1]]$trackRaw, ~ .x))
list(expSummary = map_df(retval, ~ .x$expSummary),
expStats = map_df(retval, ~ .x$expStats),
chanSummary = map_df(retval, ~ .x$chanSummary),
chanRaw = map_df(retval, ~ .x$chanRaw),
trackSummary = map_df(retval, ~ .x$trackSummary),
trackRaw = map_df(retval, ~ .x$trackRaw))
}


Expand Down Expand Up @@ -443,7 +441,8 @@ one_experiment <- function(dat_sub, experiment, results_dir, sig.figs = 4, ledge
mutate(ntrts = length(unique(treatment))) %>%

# drop any groups that only have one channel (nothing to compare)
dplyr::filter(ntrts > 1)
dplyr::filter(ntrts > 1,
!is.na(sample))

# if we have different between-treatment statistics to calculate...
if(nrow(btw_trt) > 0)
Expand Down Expand Up @@ -539,6 +538,8 @@ one_experiment <- function(dat_sub, experiment, results_dir, sig.figs = 4, ledge
treatment = '',
trt_a = '',
trt_b = '',
a = '',
b = '',
a_vs_b = list('')) %>%
filter(!is.na(date))
}
Expand Down
151 changes: 151 additions & 0 deletions data-raw/process_19000101_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
# process_19000101_data.R

library(ChemotaxisDashboard)
library(magrittr)

# start up parallel back end
parallel::makeCluster(parallel::detectCores() - 1,
type = "PSOCK"
) %>%
doParallel::registerDoParallel()

# process test data
processed_data <- process_experiments('19000101',
source_dir = system.file("extdata", package = "ChemotaxisDashboard"),
results_dir = file.path(system('git rev-parse --show-toplevel', intern = TRUE), '.data'),
seed = 923847)



#####################################
# data.frames for RSQLite db tables #
#####################################

#' users
#' User table for authentication
#'
#' @param user Character, username - used in:
#' `access`
#' @param password Character, password
users <- data.frame(user = "shinyuser", # default user
password = "12345")


#' access
#' User access table defining which experiments the user can access
#'
#' @param user Character, maps to `users$user`
#' @param expID Character, maps to `expSummary$expID`
access <- data.frame(user = "shinyuser",
expID = "19000101") # default user has access to test data only


#' expSummary
#' Experiment summary table
#'
#' @param expID Character, experiment ID - used in:
#' `access`
#' `expStats`
#' `chanSummary`
#' `chanRaw`
#' `trackSummary`
#' `trackRaw`
#' @param tracks_time Character, path to figure of tracks over time for each channel
#' @param tracks_v Character, path to figure of velocity over time for each grouping
#' @param angle_migration Character, path to figure of angle of migration viloin plots for each channel
#' @param ce Character, path to figure of chemotactic efficiency violin plots for each channel
expSummary <- processed_data$expSummary


#' expStats
#' Table of summary statistics for each experiment
#'
#' @param expID Character, maps to `expSummary$expID`
#' @param within Character, group for the comparison (i.e. within normals treated with fMLF8)
#' @param between Character, contrast for the comparison (i.e. between channels 3 and 4)
#' @param test Character, test used to compare the `between` groups
#' @param stat Double, test statistics comparing the `between` groups
#' @param p Double, p-value for `stat`
expStats <- processed_data$expStats


#' chanSummary
#' Channel summary table
#'
#' @param expID Character, maps to `expSummary$expID`
#' @param chanID Integer, channel ID - used in:
#' `chanRaw`
#' `trackRaw`
#' `trackSummary`
#' @param sID Character, sample ID - used in:
#' @param treatment Character, treatment applied to this channel
#' @param tot_finished Integer, Total number of cells that reached the bottom ledge
#' @param prop_finished Double, proportion of cells that reached the bottom ledge
#' @param ce_median Double, median chemotactic efficiency
#' @param ce_mean Double, mean chemotactic efficiency
#' @param ce_sd Double, standard deviation of chemotactic efficiency
#' @param angle_median Double, median angle of migration
#' @param angle_mean Double, mean angle of migration
#' @param angle_sd Double, standard deviation of angle of migration
#' @param max_v_median Double, median maximum velocity
#' @param max_v_mean Double, mean maximum velocity
#' @param max_v_sd Double, standard deviation of maximum velocity
#' @param dvud Double, dissimilarity score comparing directed and undirected trajectories
#' @param dvud_p Double, p-value for `dvud`
chanSummary <- processed_data$chanSummary


#' chanRaw
#' Table of smoothed trajectories over all tracks in a channel
#'
#' @param expID Character, maps to `expSummary$expID`
#' @param chanID Integer, maps to `chanSummary$chanID`
#' @param x Double, smoothed x-position for the channel
#' @param y Double, smoothed y-position for the channel
#' @param frames Integer, frame (sampled every 30 seconds)
#' @param v_x Double, velocity in the x direction (undirected)
#' @param v_y Double, velocity in the y direction (directed)
#' @param v Double, total velocity
chanRaw <- processed_data$chanRaw


#' trackSummary
#' Track summary table
#'
#' @param expID Character, maps to `expSummary$expID`
#' @param chanID Integer, maps to `chanSummary$chanID`
#' @param trackID Integer, track ID - used in:
#' `trackRaw`
#' @param ce Double, chemotactic efficiency
#' @param angle_migration Double, angle of migration
#' @param max_v Double, maximum velocity in μm per minute
#' @param av_velocity Double, mean velocity in μm per minute
#' @param finished Logical, TRUE when the cell passed the bottom ledge
trackSummary <- processed_data$trackSummary


#' trackRaw
#' Raw track information
#'
#' @param expID Character, maps to `expSummary$expID`
#' @param chanID Integer, maps to `chanSummary$chanID`
#' @param trackID Integer, maps to `trackSummary$trackID`
#' @param x Double, x-position for the track
#' @param y Double, y-position for the track
#' @param frames Integer, frame (sampled every 30 seconds)
#' @param v_x Double, velocity in the x direction (undirected)
#' @param v_y Double, velocity in the y direction (directed)
#' @param v Double, total velocity
trackRaw <- processed_data$trackRaw


# export for internal use
usethis::use_data(users,
access,
expSummary,
expStats,
chanSummary,
chanRaw,
trackSummary,
trackRaw,
internal = TRUE, overwrite = TRUE)
Loading

0 comments on commit 0d5dcaa

Please sign in to comment.