Skip to content

Commit

Permalink
Merge pull request #69 from Katsevich-Lab/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
Tim authored Dec 8, 2023
2 parents 686f273 + 89d7555 commit 60922bf
Show file tree
Hide file tree
Showing 24 changed files with 174 additions and 105 deletions.
5 changes: 3 additions & 2 deletions R/assign_grna_functions.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
assign_grnas_to_cells <- function(sceptre_object, print_progress, parallel) {
assign_grnas_to_cells <- function(sceptre_object, print_progress, parallel, n_processors, log_dir) {
# extract pieces from sceptre_object
grna_matrix <- sceptre_object@grna_matrix
grna_target_data_frame <- sceptre_object@grna_target_data_frame
Expand All @@ -13,7 +13,8 @@ assign_grnas_to_cells <- function(sceptre_object, print_progress, parallel) {
if (grna_assignment_method == "mixture") {
initial_assignment_list <- assign_grnas_to_cells_mixture(grna_matrix = grna_matrix, cell_covariate_data_frame = cell_covariate_data_frame,
grna_assignment_hyperparameters = grna_assignment_hyperparameters,
print_progress = print_progress, parallel = parallel)
print_progress = print_progress, parallel = parallel, n_processors = n_processors,
log_dir = log_dir)
}
if (grna_assignment_method == "thresholding") {
initial_assignment_list <- assign_grnas_to_cells_thresholding(grna_matrix = grna_matrix,
Expand Down
12 changes: 6 additions & 6 deletions R/aux_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,14 @@ auto_compute_cell_covariates <- function(response_matrix, grna_matrix, extra_cov
}


partition_response_ids <- function(response_ids, parallel) {
partition_response_ids <- function(response_ids, parallel, n_processors) {
groups_set <- FALSE
if (parallel) {
n_cores <- parallel::detectCores(logical = FALSE)
if (length(response_ids) >= n_cores) {
if (identical(n_processors, "auto")) n_processors <- parallel::detectCores(logical = FALSE)
if (length(response_ids) >= 2 * n_processors) {
set.seed(4)
s <- sample(response_ids)
out <- split(s, cut(seq_along(s), n_cores, labels = paste0("group_", seq(1, n_cores))))
out <- split(s, cut(seq_along(s), n_processors, labels = paste0("group_", seq(1, n_processors))))
groups_set <- TRUE
}
}
Expand All @@ -78,8 +78,8 @@ partition_response_ids <- function(response_ids, parallel) {
}


get_log_dir <- function() {
log_dir <- paste0(tempdir(), "/sceptre_logs/")
get_log_dir <- function(log_dir) {
log_dir <- paste0(log_dir, "/sceptre_logs/")
if (!dir.exists(log_dir)) dir.create(log_dir, recursive = TRUE)
return(log_dir)
}
25 changes: 20 additions & 5 deletions R/check_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ check_set_analysis_parameters <- function(sceptre_object, formula_object, respon
}


check_assign_grna_inputs <- function(sceptre_object, assignment_method, hyperparameters) {
check_assign_grna_inputs <- function(sceptre_object, assignment_method, hyperparameters, n_processors) {
if (!(assignment_method %in% c("maximum", "mixture", "thresholding"))) {
stop("`assignment_method` must be `mixture`, `maximum`, or `thresholding`.")
}
Expand Down Expand Up @@ -244,6 +244,11 @@ check_assign_grna_inputs <- function(sceptre_object, assignment_method, hyperpar
stop("`probability_threshold` should be a numeric in the interval (0,1).")
}
}

# 3. check n_processors argument
if (!(identical(n_processors, "auto") || (is.numeric(n_processors) && n_processors >= 2))) {
stop("`n_processors` should be set to the string 'auto' or an integer greater than or equal to 2.")
}
return(NULL)
}

Expand All @@ -264,20 +269,25 @@ check_run_qc_inputs <- function(n_nonzero_trt_thresh, n_nonzero_cntrl_thresh, re
}


check_calibration_check_inputs <- function(sceptre_object, n_calibration_pairs) {
check_calibration_check_inputs <- function(sceptre_object, n_calibration_pairs, n_processors) {
grna_target_data_frame <- sceptre_object@grna_target_data_frame
control_group_complement <- sceptre_object@control_group_complement
n_nt_grnas <- grna_target_data_frame |>
dplyr::filter(grna_group == "non-targeting") |> nrow()
# 1. check number of gRNAS
if (!control_group_complement) {
if (n_nt_grnas < 2) stop("Two or more non-targeting gRNAs must be present to run the calibration check when using the NT cells as the control group. gRNAs that are non-targeting should be assigned a gRNA group label of 'non-targeting' in the `grna_target_data_frame`.")
} else {
if (n_nt_grnas < 1) stop("At least one non-targeting gRNA must be present to run the calibration check. gRNAs that are non-targeting should be assigned a gRNA group label of 'non-targeting' in the `grna_target_data_frame`.")
}

# 2. check number of calibration pairs
if (n_calibration_pairs == 0L) {
stop("Cannot run a calibration check on zero negative control pairs.")
}
# 3. check n_processors
if (!(identical(n_processors, "auto") || (is.numeric(n_processors) && n_processors >= 2))) {
stop("`n_processors` should be set to the string 'auto' or an integer greater than or equal to 2.")
}
return(NULL)
}

Expand All @@ -286,8 +296,8 @@ check_discovery_analysis_inputs <- function(response_grna_group_pairs,
control_group_complement,
grna_target_data_frame,
calibration_check_run,
pc_analysis,
calibration_result, n_ok_pairs) {
pc_analysis, calibration_result,
n_ok_pairs, n_processors) {
# 1. check that positive control pairs are available
if (nrow(response_grna_group_pairs) == 0L) {
stop(paste0(ifelse(pc_analysis, "Positive control", "Discovery"), " pairs have not been supplied. Thus, the ", ifelse(pc_analysis, "power check", "discovery analysis"), " cannot be run. You can supply ", ifelse(pc_analysis, "positive control", "discovery"), " pairs in the function set_analysis_parameters()."))
Expand All @@ -311,5 +321,10 @@ check_discovery_analysis_inputs <- function(response_grna_group_pairs,
stop("Zero pairs pass pairwise QC. Cannot run analysis.")
}

# 5. check n_processors
if (!(identical(n_processors, "auto") || (is.numeric(n_processors) && n_processors >= 2))) {
stop("`n_processors` should be set to the string 'auto' or an integer greater than or equal to 2.")
}

return(NULL)
}
25 changes: 13 additions & 12 deletions R/medium_level_functs_v2.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ run_perm_test_in_memory <- function(response_matrix, grna_assignments, covariate
synthetic_idxs, output_amount, fit_parametric_curve, B1, B2, B3, calibration_check,
control_group_complement, n_nonzero_trt_thresh, n_nonzero_cntrl_thresh,
side_code, low_moi, response_precomputations, cells_in_use, print_progress,
parallel, analysis_type) {
parallel, n_processors, log_dir, analysis_type) {
# 0. define several variables
subset_to_nt_cells <- calibration_check && !control_group_complement
run_outer_regression <- calibration_check || control_group_complement
Expand All @@ -34,7 +34,7 @@ run_perm_test_in_memory <- function(response_matrix, grna_assignments, covariate
# 2. define function to loop subset of response IDs
analyze_given_response_ids <- function(curr_response_ids, proc_id = NULL) {
if (parallel && print_progress) {
f_name <- paste0(get_log_dir(), analysis_type, "_", proc_id, ".out")
f_name <- paste0(get_log_dir(log_dir), analysis_type, "_", proc_id, ".out")
file.create(f_name) |> invisible()
} else {
f_name <- NULL
Expand Down Expand Up @@ -96,14 +96,15 @@ run_perm_test_in_memory <- function(response_matrix, grna_assignments, covariate
}

# 3. partition the response IDs
partitioned_response_ids <- partition_response_ids(response_ids = response_ids, parallel = parallel)
partitioned_response_ids <- partition_response_ids(response_ids = response_ids,
parallel = parallel, n_processors = n_processors)

# 4. run the analysis
if (!parallel) {
res <- lapply(partitioned_response_ids, analyze_given_response_ids)
} else {
cat(paste0("Running ", analysis_type, " in parallel. "))
if (print_progress) cat(paste0("Change directories to ", crayon::blue(get_log_dir()), " and view the files ", crayon::blue(paste0(analysis_type, "_*.out")), " for progress updates.\n"))
if (print_progress) cat(paste0("Change directories to ", crayon::blue(get_log_dir(log_dir)), " and view the files ", crayon::blue(paste0(analysis_type, "_*.out")), " for progress updates.\n"))
res <- parallel::mclapply(seq_along(partitioned_response_ids),
function(proc_id) analyze_given_response_ids(partitioned_response_ids[[proc_id]], proc_id),
mc.cores = length(partitioned_response_ids))
Expand All @@ -121,9 +122,9 @@ run_perm_test_in_memory <- function(response_matrix, grna_assignments, covariate

# core function 2: run crt in memory
run_crt_in_memory_v2 <- function(response_matrix, grna_assignments, covariate_matrix, response_grna_group_pairs,
output_amount, fit_parametric_curve, B1, B2, B3, calibration_check,
control_group_complement, n_nonzero_trt_thresh, n_nonzero_cntrl_thresh,
side_code, low_moi, response_precomputations, cells_in_use, print_progress, parallel, analysis_type) {
output_amount, fit_parametric_curve, B1, B2, B3, calibration_check, control_group_complement,
n_nonzero_trt_thresh, n_nonzero_cntrl_thresh, side_code, low_moi, response_precomputations,
cells_in_use, print_progress, parallel, n_processors, log_dir, analysis_type) {
# 0. define several variables
subset_to_nt_cells <- calibration_check && !control_group_complement
run_outer_regression <- calibration_check || control_group_complement
Expand All @@ -141,7 +142,7 @@ run_crt_in_memory_v2 <- function(response_matrix, grna_assignments, covariate_ma
# 2. define the run precomputation function
run_precomp_on_given_responses <- function(curr_response_ids, proc_id = NULL) {
if (parallel && print_progress) {
f_name <- paste0(get_log_dir(), analysis_type, "_", proc_id, ".out")
f_name <- paste0(get_log_dir(log_dir), analysis_type, "_", proc_id, ".out")
file.create(f_name) |> invisible()
} else {
f_name <- NULL
Expand Down Expand Up @@ -177,12 +178,12 @@ run_crt_in_memory_v2 <- function(response_matrix, grna_assignments, covariate_ma

# 5. update the response precomputations (if applicable)
if (run_outer_regression) {
partitioned_response_ids <- partition_response_ids(response_ids = response_ids, parallel = parallel)
partitioned_response_ids <- partition_response_ids(response_ids = response_ids, parallel = parallel, n_processors = n_processors)
if (!parallel) {
res <- lapply(partitioned_response_ids, run_precomp_on_given_responses)
} else {
cat(paste0("Running ", analysis_type, " in parallel. "))
if (print_progress) cat(paste0("Change directories to ", crayon::blue(get_log_dir()), " and view the files ", crayon::blue(paste0(analysis_type, "_*.out")), " for progress updates. "))
if (print_progress) cat(paste0("Change directories to ", crayon::blue(get_log_dir(log_dir)), " and view the files ", crayon::blue(paste0(analysis_type, "_*.out")), " for progress updates. "))
res <- parallel::mclapply(seq_along(partitioned_response_ids),
function(proc_id) run_precomp_on_given_responses(partitioned_response_ids[[proc_id]], proc_id),
mc.cores = length(partitioned_response_ids))
Expand All @@ -194,7 +195,7 @@ run_crt_in_memory_v2 <- function(response_matrix, grna_assignments, covariate_ma
# 6. define the function to analyze the pairs
perform_association_analysis <- function(curr_grna_groups, proc_id) {
result_out_list <- vector(mode = "list", length = length(curr_grna_groups))
f_name <- if (parallel && print_progress) paste0(get_log_dir(), analysis_type, "_", proc_id, ".out") else NULL
f_name <- if (parallel && print_progress) paste0(get_log_dir(log_dir), analysis_type, "_", proc_id, ".out") else NULL

for (grna_group_idx in seq_along(curr_grna_groups)) {
curr_grna_group <- get_id_from_idx(grna_group_idx, print_progress, curr_grna_groups,
Expand Down Expand Up @@ -224,7 +225,7 @@ run_crt_in_memory_v2 <- function(response_matrix, grna_assignments, covariate_ma

# 10. perform the association analyses
grna_groups <- unique(response_grna_group_pairs$grna_group)
partitioned_grna_group_ids <- partition_response_ids(response_ids = grna_groups, parallel = parallel)
partitioned_grna_group_ids <- partition_response_ids(response_ids = grna_groups, parallel = parallel, n_processors = n_processors)
if (!parallel) {
res <- lapply(partitioned_grna_group_ids, perform_association_analysis)
} else {
Expand Down
8 changes: 4 additions & 4 deletions R/mixture_model_functs.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
assign_grnas_to_cells_mixture <- function(grna_matrix, cell_covariate_data_frame, grna_assignment_hyperparameters, print_progress, parallel) {
assign_grnas_to_cells_mixture <- function(grna_matrix, cell_covariate_data_frame, grna_assignment_hyperparameters, print_progress, parallel, n_processors, log_dir) {
if (!parallel) cat(crayon::red("Note: Set `parallel = TRUE` in the function call to improve speed.\n\n"))
# 0. get random starting guesses for pi and g_pert
starting_guesses <- get_random_starting_guesses(n_em_rep = grna_assignment_hyperparameters$n_em_rep,
Expand All @@ -16,7 +16,7 @@ assign_grnas_to_cells_mixture <- function(grna_matrix, cell_covariate_data_frame
# 3. define the function to perform assignments for a set of grnas
analyze_given_grna_ids <- function(curr_grna_ids, proc_id = NULL) {
if (parallel && print_progress) {
f_name <- paste0(get_log_dir(), "assign_grnas_", proc_id, ".out")
f_name <- paste0(get_log_dir(log_dir), "assign_grnas_", proc_id, ".out")
file.create(f_name) |> invisible()
} else {
f_name <- NULL
Expand All @@ -42,10 +42,10 @@ assign_grnas_to_cells_mixture <- function(grna_matrix, cell_covariate_data_frame
} else {
cat("Running gRNA assignments in parallel. ")
if (print_progress) {
cat(paste0("Change directories to ", crayon::blue(get_log_dir()), " and view the files ",
cat(paste0("Change directories to ", crayon::blue(get_log_dir(log_dir)), " and view the files ",
crayon::blue("assign_grnas_*.out"), " for progress updates.\n"))
}
grna_ids_partitioned <- partition_response_ids(grna_ids, parallel)
grna_ids_partitioned <- partition_response_ids(grna_ids, parallel, n_processors)
res <- parallel::mclapply(seq_along(grna_ids_partitioned),
function(proc_id) analyze_given_grna_ids(grna_ids_partitioned[[proc_id]], proc_id),
mc.cores = length(grna_ids_partitioned))
Expand Down
34 changes: 16 additions & 18 deletions R/plotting_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,6 @@ plot_grna_count_distributions <- function(sceptre_object, n_grnas_to_plot = 4L,
#' @param grnas_to_plot (optional; default \code{NULL}) a character vector giving the names of specific gRNAs to plot; if \code{NULL}, then the gRNAs are chosen at random.
#' @param point_size (optional; default \code{0.9}) the size of the individual points in the plot
#' @param transparency (optional; default \code{0.8}) the transparency of the individual points in the plot
#' @param n_max_0_grna_unprtb_plot (optional; default \code{1000}) there may be many cells with a gRNA count of 0 in the unperturbed group. This can slow down the plotting without adding useful information, so at most \code{n_max_0_grna_unprtb_plot} points from this group are plotted. Setting this to \code{Inf} will guarantee no downsampling occurs.
#' @param return_indiv_plots (optional; default \code{FALSE}) if \code{FALSE}, then a list of \code{ggplot} objects is returned; if \code{TRUE} then a single \code{cowplot} object is returned.
#'
#' @return a single \code{cowplot} object containing the combined panels (if \code{return_indiv_plots} is set to \code{TRUE}) or a list of the individual panels (if \code{return_indiv_plots} is set to \code{FALSE})
Expand All @@ -139,7 +138,9 @@ plot_grna_count_distributions <- function(sceptre_object, n_grnas_to_plot = 4L,
#' # `plot_assign_grnas()` is dispatched when
#' # `plot()` is called on the `sceptre_object`
#' # in step 3 (the gRNA assignment step).
plot_assign_grnas <- function(sceptre_object, n_grnas_to_plot = 3L, grnas_to_plot = NULL, point_size = 0.9, transparency = 0.8, n_max_0_grna_unprtb_plot = 1000, return_indiv_plots = FALSE) {
plot_assign_grnas <- function(sceptre_object, n_grnas_to_plot = 3L, grnas_to_plot = NULL, point_size = 0.9, transparency = 0.8, return_indiv_plots = FALSE) {
n_points_to_plot_per_umi <- 1000
n_grnas_to_plot_panel_b <- 1000
if (!sceptre_object@functs_called["assign_grnas"]) {
stop("This `sceptre_object` has not yet had `assign_grnas` called on it.")
}
Expand All @@ -161,7 +162,7 @@ plot_assign_grnas <- function(sceptre_object, n_grnas_to_plot = 3L, grnas_to_plo
p = grna_matrix@p,
x = grna_matrix@x,
row_idx = which(grna_id == rownames(grna_matrix)),
n_cells = ncol(grna_matrix))
n_cells = ncol(grna_matrix)) |> as.integer()
df <- data.frame(g = g,
assignment = ifelse(assignment, "pert", "unpert") |> factor(),
grna_id = grna_id |> factor(),
Expand All @@ -171,14 +172,11 @@ plot_assign_grnas <- function(sceptre_object, n_grnas_to_plot = 3L, grnas_to_plo
return(df)
}) |> data.table::rbindlist()

# downsampling the unperturbed cells with 0 grna expression, if the number of those cells
# exceeds `n_max_0_grna_unprtb_plot`.
is_0_grna_and_unperturbed <- with(to_plot_a, assignment == "unperturbed" & g == 0)
if (sum(is_0_grna_and_unperturbed) > n_max_0_grna_unprtb_plot) {
idx_to_remove <- rownames(to_plot_a)[is_0_grna_and_unperturbed] |>
sample(sum(is_0_grna_and_unperturbed) - n_max_0_grna_unprtb_plot)
to_plot_a <- to_plot_a[! rownames(to_plot_a) %in% idx_to_remove, ]
}
# downsample the unperturbed cells
to_plot_a <- to_plot_a |>
dplyr::group_by(g) |>
dplyr::sample_n(size = min(n_points_to_plot_per_umi, dplyr::n())) |>
dplyr::ungroup()

# plot a
p_a <- ggplot2::ggplot(data = to_plot_a, mapping = ggplot2::aes(x = assignment, y = g, col = assignment)) +
Expand All @@ -198,7 +196,8 @@ plot_assign_grnas <- function(sceptre_object, n_grnas_to_plot = 3L, grnas_to_plo
to_plot_b <- data.frame(x = n_cells_per_grna,
y = names(n_cells_per_grna)) |>
dplyr::arrange(n_cells_per_grna) |>
dplyr::mutate(y = factor(y, labels = y, levels = y))
dplyr::mutate(y = factor(y, labels = y, levels = y)) |>
dplyr::sample_n(min(n_grnas_to_plot_panel_b, dplyr::n()))
p_b <- ggplot2::ggplot(data = to_plot_b,
mapping = ggplot2::aes(x = x, y = y)) +
ggplot2::geom_bar(stat = "identity", width = 0.5, fill = "grey90", col = "darkblue") +
Expand All @@ -218,12 +217,11 @@ plot_assign_grnas <- function(sceptre_object, n_grnas_to_plot = 3L, grnas_to_plo
} else {
n_grnas_per_cell <- sceptre_object@grnas_per_cell
moi <- mean(n_grnas_per_cell)
to_plot_c <- data.frame(x = n_grnas_per_cell)
p_c <- ggplot2::ggplot(data = to_plot_c, mapping = ggplot2::aes(x = x)) +
ggplot2::geom_histogram(binwidth = 1, fill = "grey90", color = "darkblue") +
ggplot2::scale_y_continuous(trans = "log1p",
expand = c(0, 0),
breaks = 10^(1:8)) +
p_c <- ggplot2::ggplot(data = data.frame(x = n_grnas_per_cell),
mapping = ggplot2::aes(x = x)) +
ggplot2::geom_histogram(binwidth = max(1, 0.02 * length(unique(n_grnas_per_cell))),
fill = "grey90", color = "darkblue") +
ggplot2::scale_y_continuous(expand = c(0, 0), trans = "log1p", breaks = 10^(1:8)) +
get_my_theme() + ggplot2::ylab("Frequency") +
ggplot2::ggtitle("N gRNAs per cell") +
ggplot2::xlab("N gRNAs") +
Expand Down
Loading

0 comments on commit 60922bf

Please sign in to comment.