From e28718942ea114c1f84cf946d0f13f558ef6f92b Mon Sep 17 00:00:00 2001 From: Tim Date: Sun, 5 Nov 2023 15:11:32 -0500 Subject: [PATCH 1/9] add optional n_processors arg to assign_grnas --- R/assign_grna_functions.R | 4 ++-- R/aux_functions.R | 8 ++++---- R/check_functions.R | 7 ++++++- R/mixture_model_functs.R | 4 ++-- R/s4_analysis_functs_1.R | 7 ++++--- man/assign_grnas.Rd | 3 +++ 6 files changed, 21 insertions(+), 12 deletions(-) diff --git a/R/assign_grna_functions.R b/R/assign_grna_functions.R index 7f941072..af1dc635 100644 --- a/R/assign_grna_functions.R +++ b/R/assign_grna_functions.R @@ -1,4 +1,4 @@ -assign_grnas_to_cells <- function(sceptre_object, print_progress, parallel) { +assign_grnas_to_cells <- function(sceptre_object, print_progress, parallel, n_processors) { # extract pieces from sceptre_object grna_matrix <- sceptre_object@grna_matrix grna_target_data_frame <- sceptre_object@grna_target_data_frame @@ -13,7 +13,7 @@ assign_grnas_to_cells <- function(sceptre_object, print_progress, parallel) { if (grna_assignment_method == "mixture") { initial_assignment_list <- assign_grnas_to_cells_mixture(grna_matrix = grna_matrix, cell_covariate_data_frame = cell_covariate_data_frame, grna_assignment_hyperparameters = grna_assignment_hyperparameters, - print_progress = print_progress, parallel = parallel) + print_progress = print_progress, parallel = parallel, n_processors = n_processors) } if (grna_assignment_method == "thresholding") { initial_assignment_list <- assign_grnas_to_cells_thresholding(grna_matrix = grna_matrix, diff --git a/R/aux_functions.R b/R/aux_functions.R index a0af87a0..f5e7a1a8 100644 --- a/R/aux_functions.R +++ b/R/aux_functions.R @@ -62,14 +62,14 @@ auto_compute_cell_covariates <- function(response_matrix, grna_matrix, extra_cov } -partition_response_ids <- function(response_ids, parallel) { +partition_response_ids <- function(response_ids, parallel, n_processors) { groups_set <- FALSE if (parallel) { - n_cores <- parallel::detectCores(logical = FALSE) - if (length(response_ids) >= n_cores) { + if (identical(n_processors, "auto")) n_processors <- parallel::detectCores(logical = FALSE) + if (length(response_ids) >= 2 * n_processors) { set.seed(4) s <- sample(response_ids) - out <- split(s, cut(seq_along(s), n_cores, labels = paste0("group_", seq(1, n_cores)))) + out <- split(s, cut(seq_along(s), n_processors, labels = paste0("group_", seq(1, n_processors)))) groups_set <- TRUE } } diff --git a/R/check_functions.R b/R/check_functions.R index 4e88c38f..02fc3bd0 100644 --- a/R/check_functions.R +++ b/R/check_functions.R @@ -186,7 +186,7 @@ check_set_analysis_parameters <- function(sceptre_object, formula_object, respon } -check_assign_grna_inputs <- function(sceptre_object, assignment_method, hyperparameters) { +check_assign_grna_inputs <- function(sceptre_object, assignment_method, hyperparameters, n_processors) { if (!(assignment_method %in% c("maximum", "mixture", "thresholding"))) { stop("`assignment_method` must be `mixture`, `maximum`, or `thresholding`.") } @@ -244,6 +244,11 @@ check_assign_grna_inputs <- function(sceptre_object, assignment_method, hyperpar stop("`probability_threshold` should be a numeric in the interval (0,1).") } } + + # 3. check n_processors argument + if (!(identical(n_processors, "auto") || (is.numeric(n_processors) && n_processors >= 2))) { + stop("`n_processors` should be set to the string 'auto' or an integer greater than or equal to 2.") + } return(NULL) } diff --git a/R/mixture_model_functs.R b/R/mixture_model_functs.R index 674e80b8..046aa501 100644 --- a/R/mixture_model_functs.R +++ b/R/mixture_model_functs.R @@ -1,4 +1,4 @@ -assign_grnas_to_cells_mixture <- function(grna_matrix, cell_covariate_data_frame, grna_assignment_hyperparameters, print_progress, parallel) { +assign_grnas_to_cells_mixture <- function(grna_matrix, cell_covariate_data_frame, grna_assignment_hyperparameters, print_progress, parallel, n_processors) { if (!parallel) cat(crayon::red("Note: Set `parallel = TRUE` in the function call to improve speed.\n\n")) # 0. get random starting guesses for pi and g_pert starting_guesses <- get_random_starting_guesses(n_em_rep = grna_assignment_hyperparameters$n_em_rep, @@ -45,7 +45,7 @@ assign_grnas_to_cells_mixture <- function(grna_matrix, cell_covariate_data_frame cat(paste0("Change directories to ", crayon::blue(get_log_dir()), " and view the files ", crayon::blue("assign_grnas_*.out"), " for progress updates.\n")) } - grna_ids_partitioned <- partition_response_ids(grna_ids, parallel) + grna_ids_partitioned <- partition_response_ids(grna_ids, parallel, n_processors) res <- parallel::mclapply(seq_along(grna_ids_partitioned), function(proc_id) analyze_given_grna_ids(grna_ids_partitioned[[proc_id]], proc_id), mc.cores = length(grna_ids_partitioned)) diff --git a/R/s4_analysis_functs_1.R b/R/s4_analysis_functs_1.R index bef159ad..5f558304 100644 --- a/R/s4_analysis_functs_1.R +++ b/R/s4_analysis_functs_1.R @@ -154,13 +154,14 @@ set_analysis_parameters <- function(sceptre_object, #' @param method (optional) a string indicating the method to use to assign the gRNAs to cells, one of `"mixture"`, `"thresholding"`, or `"maximum"` #' @param print_progress (optional; default `TRUE`) a logical indicating whether to print progress updates #' @param parallel (optional; default `FALSE`) a logical indicating whether to run the function in parallel +#' @param n_processors (optional; default "auto") an integer specifying the number of processors to use if `parallel` is set to `TRUE`. The default, "auto," automatically detects the number of processors available on the machine. #' @param ... optional method-specific additional arguments #' #' @return an updated `sceptre_object` in which the gRNA assignments have been carried out #' @export #' @examples #' # see example via ?sceptre -assign_grnas <- function(sceptre_object, method = "default", print_progress = TRUE, parallel = FALSE, ...) { +assign_grnas <- function(sceptre_object, method = "default", print_progress = TRUE, parallel = FALSE, n_processors = "auto", ...) { # 0. verify that function called in correct order sceptre_object <- perform_status_check_and_update(sceptre_object, "assign_grnas") @@ -185,7 +186,7 @@ assign_grnas <- function(sceptre_object, method = "default", print_progress = TR hyperparameters <- hyperparameters_default # 2. check inputs - check_assign_grna_inputs(sceptre_object, method, hyperparameters) |> invisible() + check_assign_grna_inputs(sceptre_object, method, hyperparameters, n_processors) |> invisible() # 3. determine whether to reset response precomputations reset_response_precomps <- sceptre_object@low_moi && @@ -198,7 +199,7 @@ assign_grnas <- function(sceptre_object, method = "default", print_progress = TR sceptre_object@grna_assignment_hyperparameters <- hyperparameters # 5. assign the grnas - sceptre_object <- assign_grnas_to_cells(sceptre_object, print_progress, parallel) + sceptre_object <- assign_grnas_to_cells(sceptre_object, print_progress, parallel, n_processors) # return return(sceptre_object) diff --git a/man/assign_grnas.Rd b/man/assign_grnas.Rd index dfcccfbb..72d627eb 100644 --- a/man/assign_grnas.Rd +++ b/man/assign_grnas.Rd @@ -9,6 +9,7 @@ assign_grnas( method = "default", print_progress = TRUE, parallel = FALSE, + n_processors = "auto", ... ) } @@ -21,6 +22,8 @@ assign_grnas( \item{parallel}{(optional; default \code{FALSE}) a logical indicating whether to run the function in parallel} +\item{n_processors}{(optional; default "auto") an integer specifying the number of processors to use if \code{parallel} is set to \code{TRUE}. The default, "auto," automatically detects the number of processors available on the machine.} + \item{...}{optional method-specific additional arguments} } \value{ From 093b2d5b1a370608e3f878a0fe0288a62db2c36e Mon Sep 17 00:00:00 2001 From: Tim Date: Mon, 6 Nov 2023 11:27:50 -0500 Subject: [PATCH 2/9] add n_processors argument to de_functions --- R/check_functions.R | 18 +++++++++--- R/medium_level_functs_v2.R | 15 +++++----- R/s4_analysis_functs_2.R | 33 ++++++++++++++-------- docs/reference/assign_grnas.html | 9 ++++-- docs/reference/import_data_from_parse.html | 4 +-- docs/reference/index.html | 4 +-- docs/reference/run_calibration_check.html | 11 ++++++-- docs/reference/run_discovery_analysis.html | 11 ++++++-- docs/reference/run_power_check.html | 11 ++++++-- man/run_calibration_check.Rd | 5 +++- man/run_discovery_analysis.Rd | 5 +++- man/run_power_check.Rd | 5 +++- 12 files changed, 90 insertions(+), 41 deletions(-) diff --git a/R/check_functions.R b/R/check_functions.R index 02fc3bd0..c9478540 100644 --- a/R/check_functions.R +++ b/R/check_functions.R @@ -269,20 +269,25 @@ check_run_qc_inputs <- function(n_nonzero_trt_thresh, n_nonzero_cntrl_thresh, re } -check_calibration_check_inputs <- function(sceptre_object, n_calibration_pairs) { +check_calibration_check_inputs <- function(sceptre_object, n_calibration_pairs, n_processors) { grna_target_data_frame <- sceptre_object@grna_target_data_frame control_group_complement <- sceptre_object@control_group_complement n_nt_grnas <- grna_target_data_frame |> dplyr::filter(grna_group == "non-targeting") |> nrow() + # 1. check number of gRNAS if (!control_group_complement) { if (n_nt_grnas < 2) stop("Two or more non-targeting gRNAs must be present to run the calibration check when using the NT cells as the control group. gRNAs that are non-targeting should be assigned a gRNA group label of 'non-targeting' in the `grna_target_data_frame`.") } else { if (n_nt_grnas < 1) stop("At least one non-targeting gRNA must be present to run the calibration check. gRNAs that are non-targeting should be assigned a gRNA group label of 'non-targeting' in the `grna_target_data_frame`.") } - + # 2. check number of calibration pairs if (n_calibration_pairs == 0L) { stop("Cannot run a calibration check on zero negative control pairs.") } + # 3. check n_processors + if (!(identical(n_processors, "auto") || (is.numeric(n_processors) && n_processors >= 2))) { + stop("`n_processors` should be set to the string 'auto' or an integer greater than or equal to 2.") + } return(NULL) } @@ -291,8 +296,8 @@ check_discovery_analysis_inputs <- function(response_grna_group_pairs, control_group_complement, grna_target_data_frame, calibration_check_run, - pc_analysis, - calibration_result, n_ok_pairs) { + pc_analysis, calibration_result, + n_ok_pairs, n_processors) { # 1. check that positive control pairs are available if (nrow(response_grna_group_pairs) == 0L) { stop(paste0(ifelse(pc_analysis, "Positive control", "Discovery"), " pairs have not been supplied. Thus, the ", ifelse(pc_analysis, "power check", "discovery analysis"), " cannot be run. You can supply ", ifelse(pc_analysis, "positive control", "discovery"), " pairs in the function set_analysis_parameters().")) @@ -316,5 +321,10 @@ check_discovery_analysis_inputs <- function(response_grna_group_pairs, stop("Zero pairs pass pairwise QC. Cannot run analysis.") } + # 5. check n_processors + if (!(identical(n_processors, "auto") || (is.numeric(n_processors) && n_processors >= 2))) { + stop("`n_processors` should be set to the string 'auto' or an integer greater than or equal to 2.") + } + return(NULL) } diff --git a/R/medium_level_functs_v2.R b/R/medium_level_functs_v2.R index 0f39742f..1c311896 100644 --- a/R/medium_level_functs_v2.R +++ b/R/medium_level_functs_v2.R @@ -16,7 +16,7 @@ run_perm_test_in_memory <- function(response_matrix, grna_assignments, covariate synthetic_idxs, output_amount, fit_parametric_curve, B1, B2, B3, calibration_check, control_group_complement, n_nonzero_trt_thresh, n_nonzero_cntrl_thresh, side_code, low_moi, response_precomputations, cells_in_use, print_progress, - parallel, analysis_type) { + parallel, n_processors, analysis_type) { # 0. define several variables subset_to_nt_cells <- calibration_check && !control_group_complement run_outer_regression <- calibration_check || control_group_complement @@ -96,7 +96,8 @@ run_perm_test_in_memory <- function(response_matrix, grna_assignments, covariate } # 3. partition the response IDs - partitioned_response_ids <- partition_response_ids(response_ids = response_ids, parallel = parallel) + partitioned_response_ids <- partition_response_ids(response_ids = response_ids, + parallel = parallel, n_processors = n_processors) # 4. run the analysis if (!parallel) { @@ -121,9 +122,9 @@ run_perm_test_in_memory <- function(response_matrix, grna_assignments, covariate # core function 2: run crt in memory run_crt_in_memory_v2 <- function(response_matrix, grna_assignments, covariate_matrix, response_grna_group_pairs, - output_amount, fit_parametric_curve, B1, B2, B3, calibration_check, - control_group_complement, n_nonzero_trt_thresh, n_nonzero_cntrl_thresh, - side_code, low_moi, response_precomputations, cells_in_use, print_progress, parallel, analysis_type) { + output_amount, fit_parametric_curve, B1, B2, B3, calibration_check, control_group_complement, + n_nonzero_trt_thresh, n_nonzero_cntrl_thresh, side_code, low_moi, response_precomputations, + cells_in_use, print_progress, parallel, n_processors, analysis_type) { # 0. define several variables subset_to_nt_cells <- calibration_check && !control_group_complement run_outer_regression <- calibration_check || control_group_complement @@ -177,7 +178,7 @@ run_crt_in_memory_v2 <- function(response_matrix, grna_assignments, covariate_ma # 5. update the response precomputations (if applicable) if (run_outer_regression) { - partitioned_response_ids <- partition_response_ids(response_ids = response_ids, parallel = parallel) + partitioned_response_ids <- partition_response_ids(response_ids = response_ids, parallel = parallel, n_processors = n_processors) if (!parallel) { res <- lapply(partitioned_response_ids, run_precomp_on_given_responses) } else { @@ -224,7 +225,7 @@ run_crt_in_memory_v2 <- function(response_matrix, grna_assignments, covariate_ma # 10. perform the association analyses grna_groups <- unique(response_grna_group_pairs$grna_group) - partitioned_grna_group_ids <- partition_response_ids(response_ids = grna_groups, parallel = parallel) + partitioned_grna_group_ids <- partition_response_ids(response_ids = grna_groups, parallel = parallel, n_processors = n_processors) if (!parallel) { res <- lapply(partitioned_grna_group_ids, perform_association_analysis) } else { diff --git a/R/s4_analysis_functs_2.R b/R/s4_analysis_functs_2.R index 6bed4695..8d80803b 100644 --- a/R/s4_analysis_functs_2.R +++ b/R/s4_analysis_functs_2.R @@ -8,13 +8,15 @@ #' @param calibration_group_size (optional) the number of negative control gRNAs to put into each negative control target #' @param print_progress (optional; default `TRUE`) a logical indicating whether to print progress updates #' @param parallel (optional; default `FALSE`) a logical indicating whether to run the function in parallel +#' @param n_processors (optional; default "auto") an integer specifying the number of processors to use if `parallel` is set to `TRUE`. The default, "auto," automatically detects the number of processors available on the machine. #' @return an updated `sceptre_object` in which the calibration check has been carried out #' #' @export #' @examples #' # see example via ?sceptre run_calibration_check <- function(sceptre_object, output_amount = 1, n_calibration_pairs = NULL, - calibration_group_size = NULL, print_progress = TRUE, parallel = FALSE) { + calibration_group_size = NULL, print_progress = TRUE, parallel = FALSE, + n_processors = "auto") { # 0. advance function (if necessary), and check function call sceptre_object <- skip_assign_grnas_and_run_qc(sceptre_object, parallel) sceptre_object <- perform_status_check_and_update(sceptre_object, "run_calibration_check") @@ -25,7 +27,7 @@ run_calibration_check <- function(sceptre_object, output_amount = 1, n_calibrati if (is.null(n_calibration_pairs)) n_calibration_pairs <- sceptre_object@n_ok_discovery_pairs # 2. check inputs - check_calibration_check_inputs(sceptre_object, n_calibration_pairs) |> invisible() + check_calibration_check_inputs(sceptre_object, n_calibration_pairs, n_processors) |> invisible() # 3. construct the negative control pairs response_grna_group_pairs <- construct_negative_control_pairs_v2(sceptre_object = sceptre_object, @@ -43,7 +45,8 @@ run_calibration_check <- function(sceptre_object, output_amount = 1, n_calibrati analysis_type = "calibration_check", output_amount = output_amount, print_progress = print_progress, - parallel = parallel) + parallel = parallel, + n_processors = n_processors) # 6. update fields of sceptre object with results sceptre_object@calibration_result <- out$result |> apply_grouping_to_result(sceptre_object, TRUE) |> @@ -63,12 +66,13 @@ run_calibration_check <- function(sceptre_object, output_amount = 1, n_calibrati #' @param output_amount (optional; default `1`) an integer taking values 1, 2, or 3 specifying the amount of information to return #' @param print_progress (optional; default `TRUE`) a logical indicating whether to print progress updates #' @param parallel (optional; default `FALSE`) a logical indicating whether to run the function in parallel +#' @param n_processors (optional; default "auto") an integer specifying the number of processors to use if `parallel` is set to `TRUE`. The default, "auto," automatically detects the number of processors available on the machine. #' @return an updated `sceptre_object` in which the power check has been carried out #' #' @export #' @examples #' # see example via ?sceptre -run_power_check <- function(sceptre_object, output_amount = 1, print_progress = TRUE, parallel = FALSE) { +run_power_check <- function(sceptre_object, output_amount = 1, print_progress = TRUE, parallel = FALSE, n_processors = "auto") { # 0. verify that function called in correct order sceptre_object <- skip_assign_grnas_and_run_qc(sceptre_object, parallel) sceptre_object <- perform_status_check_and_update(sceptre_object, "run_power_check") @@ -83,7 +87,8 @@ run_power_check <- function(sceptre_object, output_amount = 1, print_progress = grna_target_data_frame = sceptre_object@grna_target_data_frame, pc_analysis = TRUE, calibration_result = sceptre_object@calibration_result, - n_ok_pairs = sceptre_object@n_ok_positive_control_pairs) |> invisible() + n_ok_pairs = sceptre_object@n_ok_positive_control_pairs, + n_processors = n_processors) |> invisible() # 3. run the sceptre analysis (high-level function call) out <- run_sceptre_analysis_high_level(sceptre_object = sceptre_object, @@ -92,7 +97,8 @@ run_power_check <- function(sceptre_object, output_amount = 1, print_progress = analysis_type = "power_check", output_amount = output_amount, print_progress = print_progress, - parallel = parallel) + parallel = parallel, + n_processors = n_processors) # 4. update fields of sceptre object with results sceptre_object@power_result <- out$result |> apply_grouping_to_result(sceptre_object) @@ -109,12 +115,13 @@ run_power_check <- function(sceptre_object, output_amount = 1, print_progress = #' @param output_amount (optional; default `1`) an integer taking values 1, 2, or 3 specifying the amount of information to return #' @param print_progress (optional; default `TRUE`) a logical indicating whether to print progress updates #' @param parallel (optional; default `FALSE`) a logical indicating whether to run the function in parallel +#' @param n_processors (optional; default "auto") an integer specifying the number of processors to use if `parallel` is set to `TRUE`. The default, "auto," automatically detects the number of processors available on the machine. #' @return an updated `sceptre_object` in which the discovery analysis has been carried out #' #' @export #' @examples #' # see example via ?sceptre -run_discovery_analysis <- function(sceptre_object, output_amount = 1, print_progress = TRUE, parallel = FALSE) { +run_discovery_analysis <- function(sceptre_object, output_amount = 1, print_progress = TRUE, parallel = FALSE, n_processors = "auto") { # 0. verify that function called in correct order sceptre_object <- skip_assign_grnas_and_run_qc(sceptre_object, parallel) sceptre_object <- perform_status_check_and_update(sceptre_object, "run_discovery_analysis") @@ -129,7 +136,8 @@ run_discovery_analysis <- function(sceptre_object, output_amount = 1, print_prog grna_target_data_frame = sceptre_object@grna_target_data_frame, pc_analysis = FALSE, calibration_result = sceptre_object@calibration_result, - n_ok_pairs = sceptre_object@n_ok_discovery_pairs) |> invisible() + n_ok_pairs = sceptre_object@n_ok_discovery_pairs, + n_processors = n_processors) |> invisible() # 3. run the sceptre analysis (high-level function call) out <- run_sceptre_analysis_high_level(sceptre_object = sceptre_object, @@ -138,7 +146,8 @@ run_discovery_analysis <- function(sceptre_object, output_amount = 1, print_prog analysis_type = "discovery_analysis", output_amount = output_amount, print_progress = print_progress, - parallel = parallel) + parallel = parallel, + n_processors = n_processors) # 4. update fields of sceptre object with results sceptre_object@discovery_result <- out$result |> apply_grouping_to_result(sceptre_object) |> @@ -149,7 +158,7 @@ run_discovery_analysis <- function(sceptre_object, output_amount = 1, print_prog } -run_sceptre_analysis_high_level <- function(sceptre_object, response_grna_group_pairs, calibration_check, analysis_type, output_amount, print_progress, parallel) { +run_sceptre_analysis_high_level <- function(sceptre_object, response_grna_group_pairs, calibration_check, analysis_type, output_amount, print_progress, parallel, n_processors) { # if running permutations, generate the permutation idxs if (sceptre_object@run_permutations) { cat("Generating permutation resamples.") @@ -177,8 +186,8 @@ run_sceptre_analysis_high_level <- function(sceptre_object, response_grna_group_ n_nonzero_cntrl_thresh = sceptre_object@n_nonzero_cntrl_thresh, side_code = sceptre_object@side_code, low_moi = sceptre_object@low_moi, response_precomputations = sceptre_object@response_precomputations, - cells_in_use = sceptre_object@cells_in_use, - print_progress = print_progress, parallel = parallel, analysis_type = analysis_type) + cells_in_use = sceptre_object@cells_in_use, print_progress = print_progress, + parallel = parallel, n_processors = n_processors, analysis_type = analysis_type) # run the method out <- if (sceptre_object@run_permutations) { diff --git a/docs/reference/assign_grnas.html b/docs/reference/assign_grnas.html index d5b98c60..5ac80e0a 100644 --- a/docs/reference/assign_grnas.html +++ b/docs/reference/assign_grnas.html @@ -1,5 +1,5 @@ -Assign gRNAs to cells — assign_grnas • sceptreAssign gRNAs to cells — assign_grnas • sceptre @@ -92,6 +92,7 @@

Usage method = "default", print_progress = TRUE, parallel = FALSE, + n_processors = "auto", ... ) @@ -114,6 +115,10 @@

Argumentsparallel is set to TRUE. The default, "auto," automatically detects the number of processors available on the machine.

+ +
...

optional method-specific additional arguments

@@ -140,7 +145,7 @@

Examples -

Site built with pkgdown 2.0.5.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/reference/import_data_from_parse.html b/docs/reference/import_data_from_parse.html index 2f930c62..c8bd1c63 100644 --- a/docs/reference/import_data_from_parse.html +++ b/docs/reference/import_data_from_parse.html @@ -1,5 +1,5 @@ -Import data from Parse (experimental) — import_data_from_parse • sceptreImport data from Parse (experimental) — import_data_from_parse • sceptre @@ -150,7 +150,7 @@

Details diff --git a/docs/reference/index.html b/docs/reference/index.html index 5a22caee..65c9ce92 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -1,5 +1,5 @@ -Function reference • sceptreFunction reference • sceptre @@ -295,7 +295,7 @@

Data

diff --git a/docs/reference/run_calibration_check.html b/docs/reference/run_calibration_check.html index 2f95c86d..5d96487f 100644 --- a/docs/reference/run_calibration_check.html +++ b/docs/reference/run_calibration_check.html @@ -1,5 +1,5 @@ -Run calibration check — run_calibration_check • sceptreRun calibration check — run_calibration_check • sceptre @@ -93,7 +93,8 @@

Usage n_calibration_pairs = NULL, calibration_group_size = NULL, print_progress = TRUE, - parallel = FALSE + parallel = FALSE, + n_processors = "auto" ) @@ -122,6 +123,10 @@

Argumentsparallel is set to TRUE. The default, "auto," automatically detects the number of processors available on the machine.

+

Value

@@ -145,7 +150,7 @@

Examples -

Site built with pkgdown 2.0.5.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/reference/run_discovery_analysis.html b/docs/reference/run_discovery_analysis.html index 508b673e..4c4fa78c 100644 --- a/docs/reference/run_discovery_analysis.html +++ b/docs/reference/run_discovery_analysis.html @@ -1,5 +1,5 @@ -Run discovery analysis — run_discovery_analysis • sceptreRun discovery analysis — run_discovery_analysis • sceptre @@ -91,7 +91,8 @@

Usage sceptre_object, output_amount = 1, print_progress = TRUE, - parallel = FALSE + parallel = FALSE, + n_processors = "auto" ) @@ -112,6 +113,10 @@

Argumentsparallel is set to TRUE. The default, "auto," automatically detects the number of processors available on the machine.

+

Value

@@ -135,7 +140,7 @@

Examples -

Site built with pkgdown 2.0.5.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/reference/run_power_check.html b/docs/reference/run_power_check.html index 9f9b810c..5c7e204b 100644 --- a/docs/reference/run_power_check.html +++ b/docs/reference/run_power_check.html @@ -1,5 +1,5 @@ -Run power check — run_power_check • sceptreRun power check — run_power_check • sceptre @@ -91,7 +91,8 @@

Usage sceptre_object, output_amount = 1, print_progress = TRUE, - parallel = FALSE + parallel = FALSE, + n_processors = "auto" ) @@ -112,6 +113,10 @@

Argumentsparallel is set to TRUE. The default, "auto," automatically detects the number of processors available on the machine.

+

Value

@@ -135,7 +140,7 @@

Examples -

Site built with pkgdown 2.0.5.

+

Site built with pkgdown 2.0.7.

diff --git a/man/run_calibration_check.Rd b/man/run_calibration_check.Rd index 9469bc41..ce15e6a5 100644 --- a/man/run_calibration_check.Rd +++ b/man/run_calibration_check.Rd @@ -10,7 +10,8 @@ run_calibration_check( n_calibration_pairs = NULL, calibration_group_size = NULL, print_progress = TRUE, - parallel = FALSE + parallel = FALSE, + n_processors = "auto" ) } \arguments{ @@ -25,6 +26,8 @@ run_calibration_check( \item{print_progress}{(optional; default \code{TRUE}) a logical indicating whether to print progress updates} \item{parallel}{(optional; default \code{FALSE}) a logical indicating whether to run the function in parallel} + +\item{n_processors}{(optional; default "auto") an integer specifying the number of processors to use if \code{parallel} is set to \code{TRUE}. The default, "auto," automatically detects the number of processors available on the machine.} } \value{ an updated \code{sceptre_object} in which the calibration check has been carried out diff --git a/man/run_discovery_analysis.Rd b/man/run_discovery_analysis.Rd index d9a5b865..c5e3ee17 100644 --- a/man/run_discovery_analysis.Rd +++ b/man/run_discovery_analysis.Rd @@ -8,7 +8,8 @@ run_discovery_analysis( sceptre_object, output_amount = 1, print_progress = TRUE, - parallel = FALSE + parallel = FALSE, + n_processors = "auto" ) } \arguments{ @@ -19,6 +20,8 @@ run_discovery_analysis( \item{print_progress}{(optional; default \code{TRUE}) a logical indicating whether to print progress updates} \item{parallel}{(optional; default \code{FALSE}) a logical indicating whether to run the function in parallel} + +\item{n_processors}{(optional; default "auto") an integer specifying the number of processors to use if \code{parallel} is set to \code{TRUE}. The default, "auto," automatically detects the number of processors available on the machine.} } \value{ an updated \code{sceptre_object} in which the discovery analysis has been carried out diff --git a/man/run_power_check.Rd b/man/run_power_check.Rd index c7300194..580dac18 100644 --- a/man/run_power_check.Rd +++ b/man/run_power_check.Rd @@ -8,7 +8,8 @@ run_power_check( sceptre_object, output_amount = 1, print_progress = TRUE, - parallel = FALSE + parallel = FALSE, + n_processors = "auto" ) } \arguments{ @@ -19,6 +20,8 @@ run_power_check( \item{print_progress}{(optional; default \code{TRUE}) a logical indicating whether to print progress updates} \item{parallel}{(optional; default \code{FALSE}) a logical indicating whether to run the function in parallel} + +\item{n_processors}{(optional; default "auto") an integer specifying the number of processors to use if \code{parallel} is set to \code{TRUE}. The default, "auto," automatically detects the number of processors available on the machine.} } \value{ an updated \code{sceptre_object} in which the power check has been carried out From 8e1022d2ce95a1f7be9a579b9d6803c582d20273 Mon Sep 17 00:00:00 2001 From: Tim Date: Mon, 6 Nov 2023 15:49:03 -0500 Subject: [PATCH 3/9] further accelerate plot_assign_grnas --- R/plotting_functions.R | 34 ++++++++++++++++------------------ R/s4_analysis_functs_1.R | 3 ++- R/sceptre.R | 4 ++-- man/sceptre.Rd | 4 ++-- 4 files changed, 22 insertions(+), 23 deletions(-) diff --git a/R/plotting_functions.R b/R/plotting_functions.R index e09d168f..40a98bbc 100644 --- a/R/plotting_functions.R +++ b/R/plotting_functions.R @@ -129,7 +129,6 @@ plot_grna_count_distributions <- function(sceptre_object, n_grnas_to_plot = 4L, #' @param grnas_to_plot (optional; default \code{NULL}) a character vector giving the names of specific gRNAs to plot; if \code{NULL}, then the gRNAs are chosen at random. #' @param point_size (optional; default \code{0.9}) the size of the individual points in the plot #' @param transparency (optional; default \code{0.8}) the transparency of the individual points in the plot -#' @param n_max_0_grna_unprtb_plot (optional; default \code{1000}) there may be many cells with a gRNA count of 0 in the unperturbed group. This can slow down the plotting without adding useful information, so at most \code{n_max_0_grna_unprtb_plot} points from this group are plotted. Setting this to \code{Inf} will guarantee no downsampling occurs. #' @param return_indiv_plots (optional; default \code{FALSE}) if \code{FALSE}, then a list of \code{ggplot} objects is returned; if \code{TRUE} then a single \code{cowplot} object is returned. #' #' @return a single \code{cowplot} object containing the combined panels (if \code{return_indiv_plots} is set to \code{TRUE}) or a list of the individual panels (if \code{return_indiv_plots} is set to \code{FALSE}) @@ -139,7 +138,9 @@ plot_grna_count_distributions <- function(sceptre_object, n_grnas_to_plot = 4L, #' # `plot_assign_grnas()` is dispatched when #' # `plot()` is called on the `sceptre_object` #' # in step 3 (the gRNA assignment step). -plot_assign_grnas <- function(sceptre_object, n_grnas_to_plot = 3L, grnas_to_plot = NULL, point_size = 0.9, transparency = 0.8, n_max_0_grna_unprtb_plot = 1000, return_indiv_plots = FALSE) { +plot_assign_grnas <- function(sceptre_object, n_grnas_to_plot = 3L, grnas_to_plot = NULL, point_size = 0.9, transparency = 0.8, return_indiv_plots = FALSE) { + n_points_to_plot_per_umi <- 1000 + n_grnas_to_plot_panel_b <- 1000 if (!sceptre_object@functs_called["assign_grnas"]) { stop("This `sceptre_object` has not yet had `assign_grnas` called on it.") } @@ -161,7 +162,7 @@ plot_assign_grnas <- function(sceptre_object, n_grnas_to_plot = 3L, grnas_to_plo p = grna_matrix@p, x = grna_matrix@x, row_idx = which(grna_id == rownames(grna_matrix)), - n_cells = ncol(grna_matrix)) + n_cells = ncol(grna_matrix)) |> as.integer() df <- data.frame(g = g, assignment = ifelse(assignment, "pert", "unpert") |> factor(), grna_id = grna_id |> factor(), @@ -171,14 +172,11 @@ plot_assign_grnas <- function(sceptre_object, n_grnas_to_plot = 3L, grnas_to_plo return(df) }) |> data.table::rbindlist() - # downsampling the unperturbed cells with 0 grna expression, if the number of those cells - # exceeds `n_max_0_grna_unprtb_plot`. - is_0_grna_and_unperturbed <- with(to_plot_a, assignment == "unperturbed" & g == 0) - if (sum(is_0_grna_and_unperturbed) > n_max_0_grna_unprtb_plot) { - idx_to_remove <- rownames(to_plot_a)[is_0_grna_and_unperturbed] |> - sample(sum(is_0_grna_and_unperturbed) - n_max_0_grna_unprtb_plot) - to_plot_a <- to_plot_a[! rownames(to_plot_a) %in% idx_to_remove, ] - } + # downsample the unperturbed cells + to_plot_a <- to_plot_a |> + dplyr::group_by(g) |> + dplyr::sample_n(size = min(n_points_to_plot_per_umi, dplyr::n())) |> + dplyr::ungroup() # plot a p_a <- ggplot2::ggplot(data = to_plot_a, mapping = ggplot2::aes(x = assignment, y = g, col = assignment)) + @@ -198,7 +196,8 @@ plot_assign_grnas <- function(sceptre_object, n_grnas_to_plot = 3L, grnas_to_plo to_plot_b <- data.frame(x = n_cells_per_grna, y = names(n_cells_per_grna)) |> dplyr::arrange(n_cells_per_grna) |> - dplyr::mutate(y = factor(y, labels = y, levels = y)) + dplyr::mutate(y = factor(y, labels = y, levels = y)) |> + dplyr::sample_n(min(n_grnas_to_plot_panel_b, dplyr::n())) p_b <- ggplot2::ggplot(data = to_plot_b, mapping = ggplot2::aes(x = x, y = y)) + ggplot2::geom_bar(stat = "identity", width = 0.5, fill = "grey90", col = "darkblue") + @@ -218,12 +217,11 @@ plot_assign_grnas <- function(sceptre_object, n_grnas_to_plot = 3L, grnas_to_plo } else { n_grnas_per_cell <- sceptre_object@grnas_per_cell moi <- mean(n_grnas_per_cell) - to_plot_c <- data.frame(x = n_grnas_per_cell) - p_c <- ggplot2::ggplot(data = to_plot_c, mapping = ggplot2::aes(x = x)) + - ggplot2::geom_histogram(binwidth = 1, fill = "grey90", color = "darkblue") + - ggplot2::scale_y_continuous(trans = "log1p", - expand = c(0, 0), - breaks = 10^(1:8)) + + p_c <- ggplot2::ggplot(data = data.frame(x = n_grnas_per_cell), + mapping = ggplot2::aes(x = x)) + + ggplot2::geom_histogram(binwidth = max(1, 0.02 * length(unique(n_grnas_per_cell))), + fill = "grey90", color = "darkblue") + + ggplot2::scale_y_continuous(expand = c(0, 0), trans = "log1p", breaks = 10^(1:8)) + get_my_theme() + ggplot2::ylab("Frequency") + ggplot2::ggtitle("N gRNAs per cell") + ggplot2::xlab("N gRNAs") + diff --git a/R/s4_analysis_functs_1.R b/R/s4_analysis_functs_1.R index 5f558304..8bd37865 100644 --- a/R/s4_analysis_functs_1.R +++ b/R/s4_analysis_functs_1.R @@ -23,8 +23,9 @@ import_data <- function(response_matrix, grna_matrix, grna_target_data_frame, mo extra_covariates = extra_covariates, response_names = if (identical(response_names, NA_character_)) rownames(response_matrix) else response_names) - # 3. make the response matrix row accessible + # 3. make the response and grna matrices row accessible response_matrix <- set_matrix_accessibility(response_matrix, make_row_accessible = TRUE) + grna_matrix <- set_matrix_accessibility(grna_matrix, make_row_accessible = TRUE) # 4. update fields in output object and return sceptre_object <- methods::new("sceptre_object") diff --git a/R/sceptre.R b/R/sceptre.R index 15149568..c51b16f2 100644 --- a/R/sceptre.R +++ b/R/sceptre.R @@ -47,7 +47,7 @@ utils::globalVariables(c("n_nonzero_trt", "n_nonzero_cntrl", "pair_str", "assign #' print(sceptre_object) #' #' # 4. run qc -#' plot_covariates(sceptre_object) +#' plot_covariates(sceptre_object, p_mito_threshold = 0.075) #' sceptre_object <- sceptre_object |> run_qc(p_mito_threshold = 0.075) #' plot(sceptre_object) #' print(sceptre_object) @@ -102,7 +102,7 @@ utils::globalVariables(c("n_nonzero_trt", "n_nonzero_cntrl", "pair_str", "assign #' print(sceptre_object) #' #' # 4. run qc -#' plot_covariates(sceptre_object) +#' plot_covariates(sceptre_object, p_mito_threshold = 0.075) #' sceptre_object <- sceptre_object |> run_qc(p_mito_threshold = 0.075) #' plot(sceptre_object) #' print(sceptre_object) diff --git a/man/sceptre.Rd b/man/sceptre.Rd index 2072eaaf..a23de1b7 100644 --- a/man/sceptre.Rd +++ b/man/sceptre.Rd @@ -41,7 +41,7 @@ plot(sceptre_object) print(sceptre_object) # 4. run qc -plot_covariates(sceptre_object) +plot_covariates(sceptre_object, p_mito_threshold = 0.075) sceptre_object <- sceptre_object |> run_qc(p_mito_threshold = 0.075) plot(sceptre_object) print(sceptre_object) @@ -96,7 +96,7 @@ plot(sceptre_object) print(sceptre_object) # 4. run qc -plot_covariates(sceptre_object) +plot_covariates(sceptre_object, p_mito_threshold = 0.075) sceptre_object <- sceptre_object |> run_qc(p_mito_threshold = 0.075) plot(sceptre_object) print(sceptre_object) From 60ac7b665875dce686bf4716bef324e23896b5b0 Mon Sep 17 00:00:00 2001 From: Tim Date: Mon, 6 Nov 2023 16:02:24 -0500 Subject: [PATCH 4/9] update test to reflect storage of grna matrix as dgr matrix --- man/plot_assign_grnas.Rd | 3 --- tests/testthat/test-s4_analysis_functs_1.R | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/man/plot_assign_grnas.Rd b/man/plot_assign_grnas.Rd index 703c6e58..6680cd9d 100644 --- a/man/plot_assign_grnas.Rd +++ b/man/plot_assign_grnas.Rd @@ -10,7 +10,6 @@ plot_assign_grnas( grnas_to_plot = NULL, point_size = 0.9, transparency = 0.8, - n_max_0_grna_unprtb_plot = 1000, return_indiv_plots = FALSE ) } @@ -25,8 +24,6 @@ plot_assign_grnas( \item{transparency}{(optional; default \code{0.8}) the transparency of the individual points in the plot} -\item{n_max_0_grna_unprtb_plot}{(optional; default \code{1000}) there may be many cells with a gRNA count of 0 in the unperturbed group. This can slow down the plotting without adding useful information, so at most \code{n_max_0_grna_unprtb_plot} points from this group are plotted. Setting this to \code{Inf} will guarantee no downsampling occurs.} - \item{return_indiv_plots}{(optional; default \code{FALSE}) if \code{FALSE}, then a list of \code{ggplot} objects is returned; if \code{TRUE} then a single \code{cowplot} object is returned.} } \value{ diff --git a/tests/testthat/test-s4_analysis_functs_1.R b/tests/testthat/test-s4_analysis_functs_1.R index 4d8c8e98..3448daec 100644 --- a/tests/testthat/test-s4_analysis_functs_1.R +++ b/tests/testthat/test-s4_analysis_functs_1.R @@ -6,7 +6,7 @@ test_that("import_data", { num_responses <- 17 grna_target_data_frame <- make_mock_grna_target_data(c(1,3,1), 1, 1, 6) grna_matrix <- matrix(rpois(nrow(grna_target_data_frame) * num_cells, 1), ncol = num_cells) |> - `rownames<-`(grna_target_data_frame$grna_id) + `rownames<-`(grna_target_data_frame$grna_id) |> set_matrix_accessibility() response_matrix <- matrix(rpois(num_responses * num_cells, 1), ncol = num_cells) |> as("TsparseMatrix") extra_covariates <- data.frame(x = rep("aaa", num_cells)) @@ -50,7 +50,7 @@ test_that("import_data", { ##### slots set in section 4 of `import_data` expect_equal(sceptre_object_high_with_ec@response_matrix, set_matrix_accessibility(response_matrix)) - expect_equal(sceptre_object_high_with_ec@grna_matrix, grna_matrix) + expect_equal(sceptre_object_high_with_ec@grna_matrix, set_matrix_accessibility(grna_matrix)) expect_equal(sceptre_object_low_no_ec_with_response_names@response_names, paste0("rrr", 1:num_responses)) expect_true(sceptre_object_low_no_ec_with_response_names@low_moi) From 838f276fd578e0882b72a788ac1f2d9788357352 Mon Sep 17 00:00:00 2001 From: Tim Date: Mon, 6 Nov 2023 18:51:01 -0500 Subject: [PATCH 5/9] delete only the outputs of write_results in write_results --- R/s4_analysis_functs_2.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/s4_analysis_functs_2.R b/R/s4_analysis_functs_2.R index 8d80803b..b4721e46 100644 --- a/R/s4_analysis_functs_2.R +++ b/R/s4_analysis_functs_2.R @@ -280,8 +280,10 @@ get_result <- function(sceptre_object, analysis) { write_outputs_to_directory <- function(sceptre_object, directory) { # 0. create directory if (!dir.exists(directory)) dir.create(path = directory, recursive = TRUE) - fs <- list.files(path = directory, full.names = TRUE) - for (f in fs) file.remove(f) + fs <- paste0(directory, "/", c("analysis_summary.txt", "plot_assign_grnas.png", "plot_run_qc.png", + "plot_run_calibration_check.png", "plot_run_power_check.png", "plot_run_discovery_analysis.png", + "results_run_calibration_check.rds", "results_run_power_check.rds", "results_run_discovery_analysis.rds")) + for (f in fs) if (file.exists(f)) file.remove(f) # 1. create analysis_summary.txt file summary_file_fp <- paste0(directory, "/analysis_summary.txt") From 7ec6c9223a03c78c1ecd64d5a5f595e4e80c602e Mon Sep 17 00:00:00 2001 From: Tim Date: Tue, 7 Nov 2023 00:01:29 -0500 Subject: [PATCH 6/9] add log_dir arg to parallel functs --- R/assign_grna_functions.R | 5 +++-- R/aux_functions.R | 4 ++-- R/medium_level_functs_v2.R | 14 +++++++------- R/mixture_model_functs.R | 6 +++--- R/s4_analysis_functs_1.R | 5 +++-- R/s4_analysis_functs_2.R | 23 +++++++++++++++-------- 6 files changed, 33 insertions(+), 24 deletions(-) diff --git a/R/assign_grna_functions.R b/R/assign_grna_functions.R index af1dc635..2b073575 100644 --- a/R/assign_grna_functions.R +++ b/R/assign_grna_functions.R @@ -1,4 +1,4 @@ -assign_grnas_to_cells <- function(sceptre_object, print_progress, parallel, n_processors) { +assign_grnas_to_cells <- function(sceptre_object, print_progress, parallel, n_processors, log_dir) { # extract pieces from sceptre_object grna_matrix <- sceptre_object@grna_matrix grna_target_data_frame <- sceptre_object@grna_target_data_frame @@ -13,7 +13,8 @@ assign_grnas_to_cells <- function(sceptre_object, print_progress, parallel, n_pr if (grna_assignment_method == "mixture") { initial_assignment_list <- assign_grnas_to_cells_mixture(grna_matrix = grna_matrix, cell_covariate_data_frame = cell_covariate_data_frame, grna_assignment_hyperparameters = grna_assignment_hyperparameters, - print_progress = print_progress, parallel = parallel, n_processors = n_processors) + print_progress = print_progress, parallel = parallel, n_processors = n_processors, + log_dir = log_dir) } if (grna_assignment_method == "thresholding") { initial_assignment_list <- assign_grnas_to_cells_thresholding(grna_matrix = grna_matrix, diff --git a/R/aux_functions.R b/R/aux_functions.R index f5e7a1a8..2e0e5eb2 100644 --- a/R/aux_functions.R +++ b/R/aux_functions.R @@ -78,8 +78,8 @@ partition_response_ids <- function(response_ids, parallel, n_processors) { } -get_log_dir <- function() { - log_dir <- paste0(tempdir(), "/sceptre_logs/") +get_log_dir <- function(log_dir) { + log_dir <- paste0(log_dir, "/sceptre_logs/") if (!dir.exists(log_dir)) dir.create(log_dir, recursive = TRUE) return(log_dir) } diff --git a/R/medium_level_functs_v2.R b/R/medium_level_functs_v2.R index 1c311896..29e2c017 100644 --- a/R/medium_level_functs_v2.R +++ b/R/medium_level_functs_v2.R @@ -16,7 +16,7 @@ run_perm_test_in_memory <- function(response_matrix, grna_assignments, covariate synthetic_idxs, output_amount, fit_parametric_curve, B1, B2, B3, calibration_check, control_group_complement, n_nonzero_trt_thresh, n_nonzero_cntrl_thresh, side_code, low_moi, response_precomputations, cells_in_use, print_progress, - parallel, n_processors, analysis_type) { + parallel, n_processors, log_dir, analysis_type) { # 0. define several variables subset_to_nt_cells <- calibration_check && !control_group_complement run_outer_regression <- calibration_check || control_group_complement @@ -34,7 +34,7 @@ run_perm_test_in_memory <- function(response_matrix, grna_assignments, covariate # 2. define function to loop subset of response IDs analyze_given_response_ids <- function(curr_response_ids, proc_id = NULL) { if (parallel && print_progress) { - f_name <- paste0(get_log_dir(), analysis_type, "_", proc_id, ".out") + f_name <- paste0(get_log_dir(log_dir), analysis_type, "_", proc_id, ".out") file.create(f_name) |> invisible() } else { f_name <- NULL @@ -104,7 +104,7 @@ run_perm_test_in_memory <- function(response_matrix, grna_assignments, covariate res <- lapply(partitioned_response_ids, analyze_given_response_ids) } else { cat(paste0("Running ", analysis_type, " in parallel. ")) - if (print_progress) cat(paste0("Change directories to ", crayon::blue(get_log_dir()), " and view the files ", crayon::blue(paste0(analysis_type, "_*.out")), " for progress updates.\n")) + if (print_progress) cat(paste0("Change directories to ", crayon::blue(get_log_dir(log_dir)), " and view the files ", crayon::blue(paste0(analysis_type, "_*.out")), " for progress updates.\n")) res <- parallel::mclapply(seq_along(partitioned_response_ids), function(proc_id) analyze_given_response_ids(partitioned_response_ids[[proc_id]], proc_id), mc.cores = length(partitioned_response_ids)) @@ -124,7 +124,7 @@ run_perm_test_in_memory <- function(response_matrix, grna_assignments, covariate run_crt_in_memory_v2 <- function(response_matrix, grna_assignments, covariate_matrix, response_grna_group_pairs, output_amount, fit_parametric_curve, B1, B2, B3, calibration_check, control_group_complement, n_nonzero_trt_thresh, n_nonzero_cntrl_thresh, side_code, low_moi, response_precomputations, - cells_in_use, print_progress, parallel, n_processors, analysis_type) { + cells_in_use, print_progress, parallel, n_processors, log_dir, analysis_type) { # 0. define several variables subset_to_nt_cells <- calibration_check && !control_group_complement run_outer_regression <- calibration_check || control_group_complement @@ -142,7 +142,7 @@ run_crt_in_memory_v2 <- function(response_matrix, grna_assignments, covariate_ma # 2. define the run precomputation function run_precomp_on_given_responses <- function(curr_response_ids, proc_id = NULL) { if (parallel && print_progress) { - f_name <- paste0(get_log_dir(), analysis_type, "_", proc_id, ".out") + f_name <- paste0(get_log_dir(log_dir), analysis_type, "_", proc_id, ".out") file.create(f_name) |> invisible() } else { f_name <- NULL @@ -183,7 +183,7 @@ run_crt_in_memory_v2 <- function(response_matrix, grna_assignments, covariate_ma res <- lapply(partitioned_response_ids, run_precomp_on_given_responses) } else { cat(paste0("Running ", analysis_type, " in parallel. ")) - if (print_progress) cat(paste0("Change directories to ", crayon::blue(get_log_dir()), " and view the files ", crayon::blue(paste0(analysis_type, "_*.out")), " for progress updates. ")) + if (print_progress) cat(paste0("Change directories to ", crayon::blue(get_log_dir(log_dir)), " and view the files ", crayon::blue(paste0(analysis_type, "_*.out")), " for progress updates. ")) res <- parallel::mclapply(seq_along(partitioned_response_ids), function(proc_id) run_precomp_on_given_responses(partitioned_response_ids[[proc_id]], proc_id), mc.cores = length(partitioned_response_ids)) @@ -195,7 +195,7 @@ run_crt_in_memory_v2 <- function(response_matrix, grna_assignments, covariate_ma # 6. define the function to analyze the pairs perform_association_analysis <- function(curr_grna_groups, proc_id) { result_out_list <- vector(mode = "list", length = length(curr_grna_groups)) - f_name <- if (parallel && print_progress) paste0(get_log_dir(), analysis_type, "_", proc_id, ".out") else NULL + f_name <- if (parallel && print_progress) paste0(get_log_dir(log_dir), analysis_type, "_", proc_id, ".out") else NULL for (grna_group_idx in seq_along(curr_grna_groups)) { curr_grna_group <- get_id_from_idx(grna_group_idx, print_progress, curr_grna_groups, diff --git a/R/mixture_model_functs.R b/R/mixture_model_functs.R index 046aa501..c20dc6a1 100644 --- a/R/mixture_model_functs.R +++ b/R/mixture_model_functs.R @@ -1,4 +1,4 @@ -assign_grnas_to_cells_mixture <- function(grna_matrix, cell_covariate_data_frame, grna_assignment_hyperparameters, print_progress, parallel, n_processors) { +assign_grnas_to_cells_mixture <- function(grna_matrix, cell_covariate_data_frame, grna_assignment_hyperparameters, print_progress, parallel, n_processors, log_dir) { if (!parallel) cat(crayon::red("Note: Set `parallel = TRUE` in the function call to improve speed.\n\n")) # 0. get random starting guesses for pi and g_pert starting_guesses <- get_random_starting_guesses(n_em_rep = grna_assignment_hyperparameters$n_em_rep, @@ -16,7 +16,7 @@ assign_grnas_to_cells_mixture <- function(grna_matrix, cell_covariate_data_frame # 3. define the function to perform assignments for a set of grnas analyze_given_grna_ids <- function(curr_grna_ids, proc_id = NULL) { if (parallel && print_progress) { - f_name <- paste0(get_log_dir(), "assign_grnas_", proc_id, ".out") + f_name <- paste0(get_log_dir(log_dir), "assign_grnas_", proc_id, ".out") file.create(f_name) |> invisible() } else { f_name <- NULL @@ -42,7 +42,7 @@ assign_grnas_to_cells_mixture <- function(grna_matrix, cell_covariate_data_frame } else { cat("Running gRNA assignments in parallel. ") if (print_progress) { - cat(paste0("Change directories to ", crayon::blue(get_log_dir()), " and view the files ", + cat(paste0("Change directories to ", crayon::blue(get_log_dir(log_dir)), " and view the files ", crayon::blue("assign_grnas_*.out"), " for progress updates.\n")) } grna_ids_partitioned <- partition_response_ids(grna_ids, parallel, n_processors) diff --git a/R/s4_analysis_functs_1.R b/R/s4_analysis_functs_1.R index 8bd37865..e6dc7ba1 100644 --- a/R/s4_analysis_functs_1.R +++ b/R/s4_analysis_functs_1.R @@ -162,7 +162,8 @@ set_analysis_parameters <- function(sceptre_object, #' @export #' @examples #' # see example via ?sceptre -assign_grnas <- function(sceptre_object, method = "default", print_progress = TRUE, parallel = FALSE, n_processors = "auto", ...) { +assign_grnas <- function(sceptre_object, method = "default", print_progress = TRUE, parallel = FALSE, + n_processors = "auto", log_dir = tempdir(), ...) { # 0. verify that function called in correct order sceptre_object <- perform_status_check_and_update(sceptre_object, "assign_grnas") @@ -200,7 +201,7 @@ assign_grnas <- function(sceptre_object, method = "default", print_progress = TR sceptre_object@grna_assignment_hyperparameters <- hyperparameters # 5. assign the grnas - sceptre_object <- assign_grnas_to_cells(sceptre_object, print_progress, parallel, n_processors) + sceptre_object <- assign_grnas_to_cells(sceptre_object, print_progress, parallel, n_processors, log_dir) # return return(sceptre_object) diff --git a/R/s4_analysis_functs_2.R b/R/s4_analysis_functs_2.R index b4721e46..73c47251 100644 --- a/R/s4_analysis_functs_2.R +++ b/R/s4_analysis_functs_2.R @@ -16,7 +16,7 @@ #' # see example via ?sceptre run_calibration_check <- function(sceptre_object, output_amount = 1, n_calibration_pairs = NULL, calibration_group_size = NULL, print_progress = TRUE, parallel = FALSE, - n_processors = "auto") { + n_processors = "auto", log_dir = tempdir()) { # 0. advance function (if necessary), and check function call sceptre_object <- skip_assign_grnas_and_run_qc(sceptre_object, parallel) sceptre_object <- perform_status_check_and_update(sceptre_object, "run_calibration_check") @@ -46,7 +46,8 @@ run_calibration_check <- function(sceptre_object, output_amount = 1, n_calibrati output_amount = output_amount, print_progress = print_progress, parallel = parallel, - n_processors = n_processors) + n_processors = n_processors, + log_dir = log_dir) # 6. update fields of sceptre object with results sceptre_object@calibration_result <- out$result |> apply_grouping_to_result(sceptre_object, TRUE) |> @@ -72,7 +73,8 @@ run_calibration_check <- function(sceptre_object, output_amount = 1, n_calibrati #' @export #' @examples #' # see example via ?sceptre -run_power_check <- function(sceptre_object, output_amount = 1, print_progress = TRUE, parallel = FALSE, n_processors = "auto") { +run_power_check <- function(sceptre_object, output_amount = 1, print_progress = TRUE, parallel = FALSE, + n_processors = "auto", log_dir = tempdir()) { # 0. verify that function called in correct order sceptre_object <- skip_assign_grnas_and_run_qc(sceptre_object, parallel) sceptre_object <- perform_status_check_and_update(sceptre_object, "run_power_check") @@ -98,7 +100,8 @@ run_power_check <- function(sceptre_object, output_amount = 1, print_progress = output_amount = output_amount, print_progress = print_progress, parallel = parallel, - n_processors = n_processors) + n_processors = n_processors, + log_dir = log_dir) # 4. update fields of sceptre object with results sceptre_object@power_result <- out$result |> apply_grouping_to_result(sceptre_object) @@ -121,7 +124,8 @@ run_power_check <- function(sceptre_object, output_amount = 1, print_progress = #' @export #' @examples #' # see example via ?sceptre -run_discovery_analysis <- function(sceptre_object, output_amount = 1, print_progress = TRUE, parallel = FALSE, n_processors = "auto") { +run_discovery_analysis <- function(sceptre_object, output_amount = 1, print_progress = TRUE, parallel = FALSE, + n_processors = "auto", log_dir = tempdir()) { # 0. verify that function called in correct order sceptre_object <- skip_assign_grnas_and_run_qc(sceptre_object, parallel) sceptre_object <- perform_status_check_and_update(sceptre_object, "run_discovery_analysis") @@ -147,7 +151,8 @@ run_discovery_analysis <- function(sceptre_object, output_amount = 1, print_prog output_amount = output_amount, print_progress = print_progress, parallel = parallel, - n_processors = n_processors) + n_processors = n_processors, + log_dir = log_dir) # 4. update fields of sceptre object with results sceptre_object@discovery_result <- out$result |> apply_grouping_to_result(sceptre_object) |> @@ -158,7 +163,8 @@ run_discovery_analysis <- function(sceptre_object, output_amount = 1, print_prog } -run_sceptre_analysis_high_level <- function(sceptre_object, response_grna_group_pairs, calibration_check, analysis_type, output_amount, print_progress, parallel, n_processors) { +run_sceptre_analysis_high_level <- function(sceptre_object, response_grna_group_pairs, calibration_check, analysis_type, output_amount, + print_progress, parallel, n_processors, log_dir) { # if running permutations, generate the permutation idxs if (sceptre_object@run_permutations) { cat("Generating permutation resamples.") @@ -187,7 +193,8 @@ run_sceptre_analysis_high_level <- function(sceptre_object, response_grna_group_ side_code = sceptre_object@side_code, low_moi = sceptre_object@low_moi, response_precomputations = sceptre_object@response_precomputations, cells_in_use = sceptre_object@cells_in_use, print_progress = print_progress, - parallel = parallel, n_processors = n_processors, analysis_type = analysis_type) + parallel = parallel, n_processors = n_processors, log_dir = log_dir, + analysis_type = analysis_type) # run the method out <- if (sceptre_object@run_permutations) { From d830d93ac1e46402a52cc11e46e69e10a3141f60 Mon Sep 17 00:00:00 2001 From: Tim Date: Sun, 19 Nov 2023 22:47:08 -0500 Subject: [PATCH 7/9] update docs --- man/assign_grnas.Rd | 1 + man/run_calibration_check.Rd | 3 ++- man/run_discovery_analysis.Rd | 3 ++- man/run_power_check.Rd | 3 ++- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/man/assign_grnas.Rd b/man/assign_grnas.Rd index 72d627eb..797d3421 100644 --- a/man/assign_grnas.Rd +++ b/man/assign_grnas.Rd @@ -10,6 +10,7 @@ assign_grnas( print_progress = TRUE, parallel = FALSE, n_processors = "auto", + log_dir = tempdir(), ... ) } diff --git a/man/run_calibration_check.Rd b/man/run_calibration_check.Rd index ce15e6a5..2efa8348 100644 --- a/man/run_calibration_check.Rd +++ b/man/run_calibration_check.Rd @@ -11,7 +11,8 @@ run_calibration_check( calibration_group_size = NULL, print_progress = TRUE, parallel = FALSE, - n_processors = "auto" + n_processors = "auto", + log_dir = tempdir() ) } \arguments{ diff --git a/man/run_discovery_analysis.Rd b/man/run_discovery_analysis.Rd index c5e3ee17..e1b7a6fc 100644 --- a/man/run_discovery_analysis.Rd +++ b/man/run_discovery_analysis.Rd @@ -9,7 +9,8 @@ run_discovery_analysis( output_amount = 1, print_progress = TRUE, parallel = FALSE, - n_processors = "auto" + n_processors = "auto", + log_dir = tempdir() ) } \arguments{ diff --git a/man/run_power_check.Rd b/man/run_power_check.Rd index 580dac18..eb4d6126 100644 --- a/man/run_power_check.Rd +++ b/man/run_power_check.Rd @@ -9,7 +9,8 @@ run_power_check( output_amount = 1, print_progress = TRUE, parallel = FALSE, - n_processors = "auto" + n_processors = "auto", + log_dir = tempdir() ) } \arguments{ From b51b980e7f92b4c1d4a158dd8b4ffd84f44b5a37 Mon Sep 17 00:00:00 2001 From: Tim Barry Date: Fri, 1 Dec 2023 16:02:55 -0500 Subject: [PATCH 8/9] update comments in pairwise_qc.cpp --- src/pairwise_qc.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/pairwise_qc.cpp b/src/pairwise_qc.cpp index 4aca9e07..b776cdf8 100644 --- a/src/pairwise_qc.cpp +++ b/src/pairwise_qc.cpp @@ -4,7 +4,7 @@ using namespace Rcpp; // this function outputs four pieces: // 1. N nonzero mat; this is the number of nonzero cells for each gene-NT gRNA pair (same regardless of control group) -// 2. n_nonzero_tot; this is the vector giving the number of nonzero cells per gene; when using the NT cells as the complement, we restrict our attention to the NT cells; when using the complement set as the control group, by contrast, we sum over all cells. +// 2. n_nonzero_tot; this is the vector giving the number of nonzero cells per gene; when using the NT cells as the control group, we restrict our attention to the NT cells; when using the complement set as the control group, by contrast, we sum over all cells. // 3,4. n nonzero trt, n nonzero cntrl vectors // [[Rcpp::export]] List compute_nt_nonzero_matrix_and_n_ok_pairs_v3(IntegerVector j, IntegerVector p, int n_cells_orig, int n_cells_sub, @@ -25,42 +25,42 @@ List compute_nt_nonzero_matrix_and_n_ok_pairs_v3(IntegerVector j, IntegerVector for (int i = 0; i < cells_in_use_zero_idx.size(); i ++) cells_in_use_zero_idx[i] = cells_in_use[i] - 1; // 1. iterate over genes - for (int column_idx = 0; column_idx < n_genes; column_idx++) { + for (int row_idx = 0; row_idx < n_genes; row_idx++) { // load nonzero positions into the boolean vector y_sub - load_nonzero_posits(j, p, column_idx, y_orig, y_sub, cells_in_use_zero_idx); - // 1.2 iterate over nt grnas, adding n nonzero trt to M matrix + load_nonzero_posits(j, p, row_idx, y_orig, y_sub, cells_in_use_zero_idx); + // iterate over nt grnas, adding n nonzero trt to M matrix for (int grna_idx = 0; grna_idx < n_nt_grnas; grna_idx ++) { n_nonzero = 0; curr_idxs = indiv_nt_grna_idxs[grna_idx]; for (int k = 0; k < curr_idxs.size(); k ++) { if (!control_group_complement) { // NT cells control group if (y_sub[all_nt_idxs[curr_idxs[k] - 1] - 1]) n_nonzero ++; // redirection - } else { // discovery cells control group + } else { // complement set control group if (y_sub[curr_idxs[k] - 1]) n_nonzero ++; // no redirection } } - M(grna_idx, column_idx) = n_nonzero; + M(grna_idx, row_idx) = n_nonzero; } // 1.2 update n_nonzero_tot for this gene - n_nonzero_tot[column_idx] = 0; + n_nonzero_tot[row_idx] = 0; if (!control_group_complement) { - for (int k = 0; k < n_nt_grnas; k ++) n_nonzero_tot[column_idx] += M(k, column_idx); + for (int k = 0; k < n_nt_grnas; k ++) n_nonzero_tot[row_idx] += M(k, row_idx); } else { - for (int k = 0; k < y_sub.size(); k++) if (y_sub[k]) n_nonzero_tot[column_idx] ++; + for (int k = 0; k < y_sub.size(); k++) if (y_sub[k]) n_nonzero_tot[row_idx] ++; } // iterate through the discovery pairs containing this gene - while (to_analyze_response_idxs[pair_pointer] - 1 == column_idx) { + while (to_analyze_response_idxs[pair_pointer] - 1 == row_idx) { curr_idxs = grna_group_idxs[to_analyze_grna_idxs[pair_pointer] - 1]; curr_n_nonzero_trt = 0; // first, get n nonzero trt for (int k = 0; k < curr_idxs.size(); k ++) if (y_sub[curr_idxs[k] - 1]) curr_n_nonzero_trt ++; // second, get n nonzero cntrl if (!control_group_complement) { // NT cells - curr_n_nonzero_cntrl = n_nonzero_tot[column_idx]; + curr_n_nonzero_cntrl = n_nonzero_tot[row_idx]; } else { // complement set - curr_n_nonzero_cntrl = n_nonzero_tot[column_idx] - curr_n_nonzero_trt; + curr_n_nonzero_cntrl = n_nonzero_tot[row_idx] - curr_n_nonzero_trt; } n_nonzero_trt[pair_pointer] = curr_n_nonzero_trt; n_nonzero_cntrl[pair_pointer] = curr_n_nonzero_cntrl; From 89d7555832d41aa4abfdc0a1061d0c7ce6f60fdc Mon Sep 17 00:00:00 2001 From: Tim Barry Date: Fri, 8 Dec 2023 13:37:56 -0500 Subject: [PATCH 9/9] fix bug: mt- and MT- are now valid prefixes for mitochondrial genes --- R/preprocessing_functions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/preprocessing_functions.R b/R/preprocessing_functions.R index 775ed6c9..51c36b5f 100644 --- a/R/preprocessing_functions.R +++ b/R/preprocessing_functions.R @@ -101,7 +101,7 @@ compute_cell_covariates <- function(matrix_in, feature_names, compute_p_mito) { matrix_in <- set_matrix_accessibility(matrix_in, make_row_accessible = FALSE) # get MT gene idxs if (compute_p_mito) { - mt_gene_idxs <- grep(pattern = "^MT-", x = feature_names) + mt_gene_idxs <- grep(pattern = "^MT-", x = feature_names, ignore.case = TRUE) compute_p_mito <- length(mt_gene_idxs) >= 1 } else { mt_gene_idxs <- integer()