From e28718942ea114c1f84cf946d0f13f558ef6f92b Mon Sep 17 00:00:00 2001
From: Tim
Date: Sun, 5 Nov 2023 15:11:32 -0500
Subject: [PATCH 1/9] add optional n_processors arg to assign_grnas
---
R/assign_grna_functions.R | 4 ++--
R/aux_functions.R | 8 ++++----
R/check_functions.R | 7 ++++++-
R/mixture_model_functs.R | 4 ++--
R/s4_analysis_functs_1.R | 7 ++++---
man/assign_grnas.Rd | 3 +++
6 files changed, 21 insertions(+), 12 deletions(-)
diff --git a/R/assign_grna_functions.R b/R/assign_grna_functions.R
index 7f941072..af1dc635 100644
--- a/R/assign_grna_functions.R
+++ b/R/assign_grna_functions.R
@@ -1,4 +1,4 @@
-assign_grnas_to_cells <- function(sceptre_object, print_progress, parallel) {
+assign_grnas_to_cells <- function(sceptre_object, print_progress, parallel, n_processors) {
# extract pieces from sceptre_object
grna_matrix <- sceptre_object@grna_matrix
grna_target_data_frame <- sceptre_object@grna_target_data_frame
@@ -13,7 +13,7 @@ assign_grnas_to_cells <- function(sceptre_object, print_progress, parallel) {
if (grna_assignment_method == "mixture") {
initial_assignment_list <- assign_grnas_to_cells_mixture(grna_matrix = grna_matrix, cell_covariate_data_frame = cell_covariate_data_frame,
grna_assignment_hyperparameters = grna_assignment_hyperparameters,
- print_progress = print_progress, parallel = parallel)
+ print_progress = print_progress, parallel = parallel, n_processors = n_processors)
}
if (grna_assignment_method == "thresholding") {
initial_assignment_list <- assign_grnas_to_cells_thresholding(grna_matrix = grna_matrix,
diff --git a/R/aux_functions.R b/R/aux_functions.R
index a0af87a0..f5e7a1a8 100644
--- a/R/aux_functions.R
+++ b/R/aux_functions.R
@@ -62,14 +62,14 @@ auto_compute_cell_covariates <- function(response_matrix, grna_matrix, extra_cov
}
-partition_response_ids <- function(response_ids, parallel) {
+partition_response_ids <- function(response_ids, parallel, n_processors) {
groups_set <- FALSE
if (parallel) {
- n_cores <- parallel::detectCores(logical = FALSE)
- if (length(response_ids) >= n_cores) {
+ if (identical(n_processors, "auto")) n_processors <- parallel::detectCores(logical = FALSE)
+ if (length(response_ids) >= 2 * n_processors) {
set.seed(4)
s <- sample(response_ids)
- out <- split(s, cut(seq_along(s), n_cores, labels = paste0("group_", seq(1, n_cores))))
+ out <- split(s, cut(seq_along(s), n_processors, labels = paste0("group_", seq(1, n_processors))))
groups_set <- TRUE
}
}
diff --git a/R/check_functions.R b/R/check_functions.R
index 4e88c38f..02fc3bd0 100644
--- a/R/check_functions.R
+++ b/R/check_functions.R
@@ -186,7 +186,7 @@ check_set_analysis_parameters <- function(sceptre_object, formula_object, respon
}
-check_assign_grna_inputs <- function(sceptre_object, assignment_method, hyperparameters) {
+check_assign_grna_inputs <- function(sceptre_object, assignment_method, hyperparameters, n_processors) {
if (!(assignment_method %in% c("maximum", "mixture", "thresholding"))) {
stop("`assignment_method` must be `mixture`, `maximum`, or `thresholding`.")
}
@@ -244,6 +244,11 @@ check_assign_grna_inputs <- function(sceptre_object, assignment_method, hyperpar
stop("`probability_threshold` should be a numeric in the interval (0,1).")
}
}
+
+ # 3. check n_processors argument
+ if (!(identical(n_processors, "auto") || (is.numeric(n_processors) && n_processors >= 2))) {
+ stop("`n_processors` should be set to the string 'auto' or an integer greater than or equal to 2.")
+ }
return(NULL)
}
diff --git a/R/mixture_model_functs.R b/R/mixture_model_functs.R
index 674e80b8..046aa501 100644
--- a/R/mixture_model_functs.R
+++ b/R/mixture_model_functs.R
@@ -1,4 +1,4 @@
-assign_grnas_to_cells_mixture <- function(grna_matrix, cell_covariate_data_frame, grna_assignment_hyperparameters, print_progress, parallel) {
+assign_grnas_to_cells_mixture <- function(grna_matrix, cell_covariate_data_frame, grna_assignment_hyperparameters, print_progress, parallel, n_processors) {
if (!parallel) cat(crayon::red("Note: Set `parallel = TRUE` in the function call to improve speed.\n\n"))
# 0. get random starting guesses for pi and g_pert
starting_guesses <- get_random_starting_guesses(n_em_rep = grna_assignment_hyperparameters$n_em_rep,
@@ -45,7 +45,7 @@ assign_grnas_to_cells_mixture <- function(grna_matrix, cell_covariate_data_frame
cat(paste0("Change directories to ", crayon::blue(get_log_dir()), " and view the files ",
crayon::blue("assign_grnas_*.out"), " for progress updates.\n"))
}
- grna_ids_partitioned <- partition_response_ids(grna_ids, parallel)
+ grna_ids_partitioned <- partition_response_ids(grna_ids, parallel, n_processors)
res <- parallel::mclapply(seq_along(grna_ids_partitioned),
function(proc_id) analyze_given_grna_ids(grna_ids_partitioned[[proc_id]], proc_id),
mc.cores = length(grna_ids_partitioned))
diff --git a/R/s4_analysis_functs_1.R b/R/s4_analysis_functs_1.R
index bef159ad..5f558304 100644
--- a/R/s4_analysis_functs_1.R
+++ b/R/s4_analysis_functs_1.R
@@ -154,13 +154,14 @@ set_analysis_parameters <- function(sceptre_object,
#' @param method (optional) a string indicating the method to use to assign the gRNAs to cells, one of `"mixture"`, `"thresholding"`, or `"maximum"`
#' @param print_progress (optional; default `TRUE`) a logical indicating whether to print progress updates
#' @param parallel (optional; default `FALSE`) a logical indicating whether to run the function in parallel
+#' @param n_processors (optional; default "auto") an integer specifying the number of processors to use if `parallel` is set to `TRUE`. The default, "auto," automatically detects the number of processors available on the machine.
#' @param ... optional method-specific additional arguments
#'
#' @return an updated `sceptre_object` in which the gRNA assignments have been carried out
#' @export
#' @examples
#' # see example via ?sceptre
-assign_grnas <- function(sceptre_object, method = "default", print_progress = TRUE, parallel = FALSE, ...) {
+assign_grnas <- function(sceptre_object, method = "default", print_progress = TRUE, parallel = FALSE, n_processors = "auto", ...) {
# 0. verify that function called in correct order
sceptre_object <- perform_status_check_and_update(sceptre_object, "assign_grnas")
@@ -185,7 +186,7 @@ assign_grnas <- function(sceptre_object, method = "default", print_progress = TR
hyperparameters <- hyperparameters_default
# 2. check inputs
- check_assign_grna_inputs(sceptre_object, method, hyperparameters) |> invisible()
+ check_assign_grna_inputs(sceptre_object, method, hyperparameters, n_processors) |> invisible()
# 3. determine whether to reset response precomputations
reset_response_precomps <- sceptre_object@low_moi &&
@@ -198,7 +199,7 @@ assign_grnas <- function(sceptre_object, method = "default", print_progress = TR
sceptre_object@grna_assignment_hyperparameters <- hyperparameters
# 5. assign the grnas
- sceptre_object <- assign_grnas_to_cells(sceptre_object, print_progress, parallel)
+ sceptre_object <- assign_grnas_to_cells(sceptre_object, print_progress, parallel, n_processors)
# return
return(sceptre_object)
diff --git a/man/assign_grnas.Rd b/man/assign_grnas.Rd
index dfcccfbb..72d627eb 100644
--- a/man/assign_grnas.Rd
+++ b/man/assign_grnas.Rd
@@ -9,6 +9,7 @@ assign_grnas(
method = "default",
print_progress = TRUE,
parallel = FALSE,
+ n_processors = "auto",
...
)
}
@@ -21,6 +22,8 @@ assign_grnas(
\item{parallel}{(optional; default \code{FALSE}) a logical indicating whether to run the function in parallel}
+\item{n_processors}{(optional; default "auto") an integer specifying the number of processors to use if \code{parallel} is set to \code{TRUE}. The default, "auto," automatically detects the number of processors available on the machine.}
+
\item{...}{optional method-specific additional arguments}
}
\value{
From 093b2d5b1a370608e3f878a0fe0288a62db2c36e Mon Sep 17 00:00:00 2001
From: Tim
Date: Mon, 6 Nov 2023 11:27:50 -0500
Subject: [PATCH 2/9] add n_processors argument to de_functions
---
R/check_functions.R | 18 +++++++++---
R/medium_level_functs_v2.R | 15 +++++-----
R/s4_analysis_functs_2.R | 33 ++++++++++++++--------
docs/reference/assign_grnas.html | 9 ++++--
docs/reference/import_data_from_parse.html | 4 +--
docs/reference/index.html | 4 +--
docs/reference/run_calibration_check.html | 11 ++++++--
docs/reference/run_discovery_analysis.html | 11 ++++++--
docs/reference/run_power_check.html | 11 ++++++--
man/run_calibration_check.Rd | 5 +++-
man/run_discovery_analysis.Rd | 5 +++-
man/run_power_check.Rd | 5 +++-
12 files changed, 90 insertions(+), 41 deletions(-)
diff --git a/R/check_functions.R b/R/check_functions.R
index 02fc3bd0..c9478540 100644
--- a/R/check_functions.R
+++ b/R/check_functions.R
@@ -269,20 +269,25 @@ check_run_qc_inputs <- function(n_nonzero_trt_thresh, n_nonzero_cntrl_thresh, re
}
-check_calibration_check_inputs <- function(sceptre_object, n_calibration_pairs) {
+check_calibration_check_inputs <- function(sceptre_object, n_calibration_pairs, n_processors) {
grna_target_data_frame <- sceptre_object@grna_target_data_frame
control_group_complement <- sceptre_object@control_group_complement
n_nt_grnas <- grna_target_data_frame |>
dplyr::filter(grna_group == "non-targeting") |> nrow()
+ # 1. check number of gRNAS
if (!control_group_complement) {
if (n_nt_grnas < 2) stop("Two or more non-targeting gRNAs must be present to run the calibration check when using the NT cells as the control group. gRNAs that are non-targeting should be assigned a gRNA group label of 'non-targeting' in the `grna_target_data_frame`.")
} else {
if (n_nt_grnas < 1) stop("At least one non-targeting gRNA must be present to run the calibration check. gRNAs that are non-targeting should be assigned a gRNA group label of 'non-targeting' in the `grna_target_data_frame`.")
}
-
+ # 2. check number of calibration pairs
if (n_calibration_pairs == 0L) {
stop("Cannot run a calibration check on zero negative control pairs.")
}
+ # 3. check n_processors
+ if (!(identical(n_processors, "auto") || (is.numeric(n_processors) && n_processors >= 2))) {
+ stop("`n_processors` should be set to the string 'auto' or an integer greater than or equal to 2.")
+ }
return(NULL)
}
@@ -291,8 +296,8 @@ check_discovery_analysis_inputs <- function(response_grna_group_pairs,
control_group_complement,
grna_target_data_frame,
calibration_check_run,
- pc_analysis,
- calibration_result, n_ok_pairs) {
+ pc_analysis, calibration_result,
+ n_ok_pairs, n_processors) {
# 1. check that positive control pairs are available
if (nrow(response_grna_group_pairs) == 0L) {
stop(paste0(ifelse(pc_analysis, "Positive control", "Discovery"), " pairs have not been supplied. Thus, the ", ifelse(pc_analysis, "power check", "discovery analysis"), " cannot be run. You can supply ", ifelse(pc_analysis, "positive control", "discovery"), " pairs in the function set_analysis_parameters()."))
@@ -316,5 +321,10 @@ check_discovery_analysis_inputs <- function(response_grna_group_pairs,
stop("Zero pairs pass pairwise QC. Cannot run analysis.")
}
+ # 5. check n_processors
+ if (!(identical(n_processors, "auto") || (is.numeric(n_processors) && n_processors >= 2))) {
+ stop("`n_processors` should be set to the string 'auto' or an integer greater than or equal to 2.")
+ }
+
return(NULL)
}
diff --git a/R/medium_level_functs_v2.R b/R/medium_level_functs_v2.R
index 0f39742f..1c311896 100644
--- a/R/medium_level_functs_v2.R
+++ b/R/medium_level_functs_v2.R
@@ -16,7 +16,7 @@ run_perm_test_in_memory <- function(response_matrix, grna_assignments, covariate
synthetic_idxs, output_amount, fit_parametric_curve, B1, B2, B3, calibration_check,
control_group_complement, n_nonzero_trt_thresh, n_nonzero_cntrl_thresh,
side_code, low_moi, response_precomputations, cells_in_use, print_progress,
- parallel, analysis_type) {
+ parallel, n_processors, analysis_type) {
# 0. define several variables
subset_to_nt_cells <- calibration_check && !control_group_complement
run_outer_regression <- calibration_check || control_group_complement
@@ -96,7 +96,8 @@ run_perm_test_in_memory <- function(response_matrix, grna_assignments, covariate
}
# 3. partition the response IDs
- partitioned_response_ids <- partition_response_ids(response_ids = response_ids, parallel = parallel)
+ partitioned_response_ids <- partition_response_ids(response_ids = response_ids,
+ parallel = parallel, n_processors = n_processors)
# 4. run the analysis
if (!parallel) {
@@ -121,9 +122,9 @@ run_perm_test_in_memory <- function(response_matrix, grna_assignments, covariate
# core function 2: run crt in memory
run_crt_in_memory_v2 <- function(response_matrix, grna_assignments, covariate_matrix, response_grna_group_pairs,
- output_amount, fit_parametric_curve, B1, B2, B3, calibration_check,
- control_group_complement, n_nonzero_trt_thresh, n_nonzero_cntrl_thresh,
- side_code, low_moi, response_precomputations, cells_in_use, print_progress, parallel, analysis_type) {
+ output_amount, fit_parametric_curve, B1, B2, B3, calibration_check, control_group_complement,
+ n_nonzero_trt_thresh, n_nonzero_cntrl_thresh, side_code, low_moi, response_precomputations,
+ cells_in_use, print_progress, parallel, n_processors, analysis_type) {
# 0. define several variables
subset_to_nt_cells <- calibration_check && !control_group_complement
run_outer_regression <- calibration_check || control_group_complement
@@ -177,7 +178,7 @@ run_crt_in_memory_v2 <- function(response_matrix, grna_assignments, covariate_ma
# 5. update the response precomputations (if applicable)
if (run_outer_regression) {
- partitioned_response_ids <- partition_response_ids(response_ids = response_ids, parallel = parallel)
+ partitioned_response_ids <- partition_response_ids(response_ids = response_ids, parallel = parallel, n_processors = n_processors)
if (!parallel) {
res <- lapply(partitioned_response_ids, run_precomp_on_given_responses)
} else {
@@ -224,7 +225,7 @@ run_crt_in_memory_v2 <- function(response_matrix, grna_assignments, covariate_ma
# 10. perform the association analyses
grna_groups <- unique(response_grna_group_pairs$grna_group)
- partitioned_grna_group_ids <- partition_response_ids(response_ids = grna_groups, parallel = parallel)
+ partitioned_grna_group_ids <- partition_response_ids(response_ids = grna_groups, parallel = parallel, n_processors = n_processors)
if (!parallel) {
res <- lapply(partitioned_grna_group_ids, perform_association_analysis)
} else {
diff --git a/R/s4_analysis_functs_2.R b/R/s4_analysis_functs_2.R
index 6bed4695..8d80803b 100644
--- a/R/s4_analysis_functs_2.R
+++ b/R/s4_analysis_functs_2.R
@@ -8,13 +8,15 @@
#' @param calibration_group_size (optional) the number of negative control gRNAs to put into each negative control target
#' @param print_progress (optional; default `TRUE`) a logical indicating whether to print progress updates
#' @param parallel (optional; default `FALSE`) a logical indicating whether to run the function in parallel
+#' @param n_processors (optional; default "auto") an integer specifying the number of processors to use if `parallel` is set to `TRUE`. The default, "auto," automatically detects the number of processors available on the machine.
#' @return an updated `sceptre_object` in which the calibration check has been carried out
#'
#' @export
#' @examples
#' # see example via ?sceptre
run_calibration_check <- function(sceptre_object, output_amount = 1, n_calibration_pairs = NULL,
- calibration_group_size = NULL, print_progress = TRUE, parallel = FALSE) {
+ calibration_group_size = NULL, print_progress = TRUE, parallel = FALSE,
+ n_processors = "auto") {
# 0. advance function (if necessary), and check function call
sceptre_object <- skip_assign_grnas_and_run_qc(sceptre_object, parallel)
sceptre_object <- perform_status_check_and_update(sceptre_object, "run_calibration_check")
@@ -25,7 +27,7 @@ run_calibration_check <- function(sceptre_object, output_amount = 1, n_calibrati
if (is.null(n_calibration_pairs)) n_calibration_pairs <- sceptre_object@n_ok_discovery_pairs
# 2. check inputs
- check_calibration_check_inputs(sceptre_object, n_calibration_pairs) |> invisible()
+ check_calibration_check_inputs(sceptre_object, n_calibration_pairs, n_processors) |> invisible()
# 3. construct the negative control pairs
response_grna_group_pairs <- construct_negative_control_pairs_v2(sceptre_object = sceptre_object,
@@ -43,7 +45,8 @@ run_calibration_check <- function(sceptre_object, output_amount = 1, n_calibrati
analysis_type = "calibration_check",
output_amount = output_amount,
print_progress = print_progress,
- parallel = parallel)
+ parallel = parallel,
+ n_processors = n_processors)
# 6. update fields of sceptre object with results
sceptre_object@calibration_result <- out$result |> apply_grouping_to_result(sceptre_object, TRUE) |>
@@ -63,12 +66,13 @@ run_calibration_check <- function(sceptre_object, output_amount = 1, n_calibrati
#' @param output_amount (optional; default `1`) an integer taking values 1, 2, or 3 specifying the amount of information to return
#' @param print_progress (optional; default `TRUE`) a logical indicating whether to print progress updates
#' @param parallel (optional; default `FALSE`) a logical indicating whether to run the function in parallel
+#' @param n_processors (optional; default "auto") an integer specifying the number of processors to use if `parallel` is set to `TRUE`. The default, "auto," automatically detects the number of processors available on the machine.
#' @return an updated `sceptre_object` in which the power check has been carried out
#'
#' @export
#' @examples
#' # see example via ?sceptre
-run_power_check <- function(sceptre_object, output_amount = 1, print_progress = TRUE, parallel = FALSE) {
+run_power_check <- function(sceptre_object, output_amount = 1, print_progress = TRUE, parallel = FALSE, n_processors = "auto") {
# 0. verify that function called in correct order
sceptre_object <- skip_assign_grnas_and_run_qc(sceptre_object, parallel)
sceptre_object <- perform_status_check_and_update(sceptre_object, "run_power_check")
@@ -83,7 +87,8 @@ run_power_check <- function(sceptre_object, output_amount = 1, print_progress =
grna_target_data_frame = sceptre_object@grna_target_data_frame,
pc_analysis = TRUE,
calibration_result = sceptre_object@calibration_result,
- n_ok_pairs = sceptre_object@n_ok_positive_control_pairs) |> invisible()
+ n_ok_pairs = sceptre_object@n_ok_positive_control_pairs,
+ n_processors = n_processors) |> invisible()
# 3. run the sceptre analysis (high-level function call)
out <- run_sceptre_analysis_high_level(sceptre_object = sceptre_object,
@@ -92,7 +97,8 @@ run_power_check <- function(sceptre_object, output_amount = 1, print_progress =
analysis_type = "power_check",
output_amount = output_amount,
print_progress = print_progress,
- parallel = parallel)
+ parallel = parallel,
+ n_processors = n_processors)
# 4. update fields of sceptre object with results
sceptre_object@power_result <- out$result |> apply_grouping_to_result(sceptre_object)
@@ -109,12 +115,13 @@ run_power_check <- function(sceptre_object, output_amount = 1, print_progress =
#' @param output_amount (optional; default `1`) an integer taking values 1, 2, or 3 specifying the amount of information to return
#' @param print_progress (optional; default `TRUE`) a logical indicating whether to print progress updates
#' @param parallel (optional; default `FALSE`) a logical indicating whether to run the function in parallel
+#' @param n_processors (optional; default "auto") an integer specifying the number of processors to use if `parallel` is set to `TRUE`. The default, "auto," automatically detects the number of processors available on the machine.
#' @return an updated `sceptre_object` in which the discovery analysis has been carried out
#'
#' @export
#' @examples
#' # see example via ?sceptre
-run_discovery_analysis <- function(sceptre_object, output_amount = 1, print_progress = TRUE, parallel = FALSE) {
+run_discovery_analysis <- function(sceptre_object, output_amount = 1, print_progress = TRUE, parallel = FALSE, n_processors = "auto") {
# 0. verify that function called in correct order
sceptre_object <- skip_assign_grnas_and_run_qc(sceptre_object, parallel)
sceptre_object <- perform_status_check_and_update(sceptre_object, "run_discovery_analysis")
@@ -129,7 +136,8 @@ run_discovery_analysis <- function(sceptre_object, output_amount = 1, print_prog
grna_target_data_frame = sceptre_object@grna_target_data_frame,
pc_analysis = FALSE,
calibration_result = sceptre_object@calibration_result,
- n_ok_pairs = sceptre_object@n_ok_discovery_pairs) |> invisible()
+ n_ok_pairs = sceptre_object@n_ok_discovery_pairs,
+ n_processors = n_processors) |> invisible()
# 3. run the sceptre analysis (high-level function call)
out <- run_sceptre_analysis_high_level(sceptre_object = sceptre_object,
@@ -138,7 +146,8 @@ run_discovery_analysis <- function(sceptre_object, output_amount = 1, print_prog
analysis_type = "discovery_analysis",
output_amount = output_amount,
print_progress = print_progress,
- parallel = parallel)
+ parallel = parallel,
+ n_processors = n_processors)
# 4. update fields of sceptre object with results
sceptre_object@discovery_result <- out$result |> apply_grouping_to_result(sceptre_object) |>
@@ -149,7 +158,7 @@ run_discovery_analysis <- function(sceptre_object, output_amount = 1, print_prog
}
-run_sceptre_analysis_high_level <- function(sceptre_object, response_grna_group_pairs, calibration_check, analysis_type, output_amount, print_progress, parallel) {
+run_sceptre_analysis_high_level <- function(sceptre_object, response_grna_group_pairs, calibration_check, analysis_type, output_amount, print_progress, parallel, n_processors) {
# if running permutations, generate the permutation idxs
if (sceptre_object@run_permutations) {
cat("Generating permutation resamples.")
@@ -177,8 +186,8 @@ run_sceptre_analysis_high_level <- function(sceptre_object, response_grna_group_
n_nonzero_cntrl_thresh = sceptre_object@n_nonzero_cntrl_thresh,
side_code = sceptre_object@side_code, low_moi = sceptre_object@low_moi,
response_precomputations = sceptre_object@response_precomputations,
- cells_in_use = sceptre_object@cells_in_use,
- print_progress = print_progress, parallel = parallel, analysis_type = analysis_type)
+ cells_in_use = sceptre_object@cells_in_use, print_progress = print_progress,
+ parallel = parallel, n_processors = n_processors, analysis_type = analysis_type)
# run the method
out <- if (sceptre_object@run_permutations) {
diff --git a/docs/reference/assign_grnas.html b/docs/reference/assign_grnas.html
index d5b98c60..5ac80e0a 100644
--- a/docs/reference/assign_grnas.html
+++ b/docs/reference/assign_grnas.html
@@ -1,5 +1,5 @@
-Assign gRNAs to cells — assign_grnas • sceptreAssign gRNAs to cells — assign_grnas • sceptre
@@ -92,6 +92,7 @@
+
+
Examples
- Site built with pkgdown 2.0.5.
+ Site built with pkgdown 2.0.7.
diff --git a/docs/reference/import_data_from_parse.html b/docs/reference/import_data_from_parse.html
index 2f930c62..c8bd1c63 100644
--- a/docs/reference/import_data_from_parse.html
+++ b/docs/reference/import_data_from_parse.html
@@ -1,5 +1,5 @@
-Import data from Parse (experimental) — import_data_from_parse • sceptreImport data from Parse (experimental) — import_data_from_parse • sceptre
@@ -150,7 +150,7 @@ Details
diff --git a/docs/reference/index.html b/docs/reference/index.html
index 5a22caee..65c9ce92 100644
--- a/docs/reference/index.html
+++ b/docs/reference/index.html
@@ -1,5 +1,5 @@
-Function reference • sceptreFunction reference • sceptre
@@ -295,7 +295,7 @@ Data
diff --git a/docs/reference/run_calibration_check.html b/docs/reference/run_calibration_check.html
index 2f95c86d..5d96487f 100644
--- a/docs/reference/run_calibration_check.html
+++ b/docs/reference/run_calibration_check.html
@@ -1,5 +1,5 @@
-Run calibration check — run_calibration_check • sceptreRun calibration check — run_calibration_check • sceptre
@@ -93,7 +93,8 @@ Usage
n_calibration_pairs = NULL,
calibration_group_size = NULL,
print_progress = TRUE,
- parallel = FALSE
+ parallel = FALSE,
+ n_processors = "auto"
)
@@ -122,6 +123,10 @@ Argumentsparallel is set to TRUE
. The default, "auto," automatically detects the number of processors available on the machine.
+
diff --git a/docs/reference/run_discovery_analysis.html b/docs/reference/run_discovery_analysis.html
index 508b673e..4c4fa78c 100644
--- a/docs/reference/run_discovery_analysis.html
+++ b/docs/reference/run_discovery_analysis.html
@@ -1,5 +1,5 @@
-Run discovery analysis — run_discovery_analysis • sceptreRun discovery analysis — run_discovery_analysis • sceptre
@@ -91,7 +91,8 @@ Usage
sceptre_object,
output_amount = 1,
print_progress = TRUE,
- parallel = FALSE
+ parallel = FALSE,
+ n_processors = "auto"
)
@@ -112,6 +113,10 @@ Argumentsparallel is set to TRUE
. The default, "auto," automatically detects the number of processors available on the machine.
+
diff --git a/docs/reference/run_power_check.html b/docs/reference/run_power_check.html
index 9f9b810c..5c7e204b 100644
--- a/docs/reference/run_power_check.html
+++ b/docs/reference/run_power_check.html
@@ -1,5 +1,5 @@
-Run power check — run_power_check • sceptreRun power check — run_power_check • sceptre
@@ -91,7 +91,8 @@ Usage
sceptre_object,
output_amount = 1,
print_progress = TRUE,
- parallel = FALSE
+ parallel = FALSE,
+ n_processors = "auto"
)
@@ -112,6 +113,10 @@ Argumentsparallel is set to TRUE
. The default, "auto," automatically detects the number of processors available on the machine.
+
diff --git a/man/run_calibration_check.Rd b/man/run_calibration_check.Rd
index 9469bc41..ce15e6a5 100644
--- a/man/run_calibration_check.Rd
+++ b/man/run_calibration_check.Rd
@@ -10,7 +10,8 @@ run_calibration_check(
n_calibration_pairs = NULL,
calibration_group_size = NULL,
print_progress = TRUE,
- parallel = FALSE
+ parallel = FALSE,
+ n_processors = "auto"
)
}
\arguments{
@@ -25,6 +26,8 @@ run_calibration_check(
\item{print_progress}{(optional; default \code{TRUE}) a logical indicating whether to print progress updates}
\item{parallel}{(optional; default \code{FALSE}) a logical indicating whether to run the function in parallel}
+
+\item{n_processors}{(optional; default "auto") an integer specifying the number of processors to use if \code{parallel} is set to \code{TRUE}. The default, "auto," automatically detects the number of processors available on the machine.}
}
\value{
an updated \code{sceptre_object} in which the calibration check has been carried out
diff --git a/man/run_discovery_analysis.Rd b/man/run_discovery_analysis.Rd
index d9a5b865..c5e3ee17 100644
--- a/man/run_discovery_analysis.Rd
+++ b/man/run_discovery_analysis.Rd
@@ -8,7 +8,8 @@ run_discovery_analysis(
sceptre_object,
output_amount = 1,
print_progress = TRUE,
- parallel = FALSE
+ parallel = FALSE,
+ n_processors = "auto"
)
}
\arguments{
@@ -19,6 +20,8 @@ run_discovery_analysis(
\item{print_progress}{(optional; default \code{TRUE}) a logical indicating whether to print progress updates}
\item{parallel}{(optional; default \code{FALSE}) a logical indicating whether to run the function in parallel}
+
+\item{n_processors}{(optional; default "auto") an integer specifying the number of processors to use if \code{parallel} is set to \code{TRUE}. The default, "auto," automatically detects the number of processors available on the machine.}
}
\value{
an updated \code{sceptre_object} in which the discovery analysis has been carried out
diff --git a/man/run_power_check.Rd b/man/run_power_check.Rd
index c7300194..580dac18 100644
--- a/man/run_power_check.Rd
+++ b/man/run_power_check.Rd
@@ -8,7 +8,8 @@ run_power_check(
sceptre_object,
output_amount = 1,
print_progress = TRUE,
- parallel = FALSE
+ parallel = FALSE,
+ n_processors = "auto"
)
}
\arguments{
@@ -19,6 +20,8 @@ run_power_check(
\item{print_progress}{(optional; default \code{TRUE}) a logical indicating whether to print progress updates}
\item{parallel}{(optional; default \code{FALSE}) a logical indicating whether to run the function in parallel}
+
+\item{n_processors}{(optional; default "auto") an integer specifying the number of processors to use if \code{parallel} is set to \code{TRUE}. The default, "auto," automatically detects the number of processors available on the machine.}
}
\value{
an updated \code{sceptre_object} in which the power check has been carried out
From 8e1022d2ce95a1f7be9a579b9d6803c582d20273 Mon Sep 17 00:00:00 2001
From: Tim
Date: Mon, 6 Nov 2023 15:49:03 -0500
Subject: [PATCH 3/9] further accelerate plot_assign_grnas
---
R/plotting_functions.R | 34 ++++++++++++++++------------------
R/s4_analysis_functs_1.R | 3 ++-
R/sceptre.R | 4 ++--
man/sceptre.Rd | 4 ++--
4 files changed, 22 insertions(+), 23 deletions(-)
diff --git a/R/plotting_functions.R b/R/plotting_functions.R
index e09d168f..40a98bbc 100644
--- a/R/plotting_functions.R
+++ b/R/plotting_functions.R
@@ -129,7 +129,6 @@ plot_grna_count_distributions <- function(sceptre_object, n_grnas_to_plot = 4L,
#' @param grnas_to_plot (optional; default \code{NULL}) a character vector giving the names of specific gRNAs to plot; if \code{NULL}, then the gRNAs are chosen at random.
#' @param point_size (optional; default \code{0.9}) the size of the individual points in the plot
#' @param transparency (optional; default \code{0.8}) the transparency of the individual points in the plot
-#' @param n_max_0_grna_unprtb_plot (optional; default \code{1000}) there may be many cells with a gRNA count of 0 in the unperturbed group. This can slow down the plotting without adding useful information, so at most \code{n_max_0_grna_unprtb_plot} points from this group are plotted. Setting this to \code{Inf} will guarantee no downsampling occurs.
#' @param return_indiv_plots (optional; default \code{FALSE}) if \code{FALSE}, then a list of \code{ggplot} objects is returned; if \code{TRUE} then a single \code{cowplot} object is returned.
#'
#' @return a single \code{cowplot} object containing the combined panels (if \code{return_indiv_plots} is set to \code{TRUE}) or a list of the individual panels (if \code{return_indiv_plots} is set to \code{FALSE})
@@ -139,7 +138,9 @@ plot_grna_count_distributions <- function(sceptre_object, n_grnas_to_plot = 4L,
#' # `plot_assign_grnas()` is dispatched when
#' # `plot()` is called on the `sceptre_object`
#' # in step 3 (the gRNA assignment step).
-plot_assign_grnas <- function(sceptre_object, n_grnas_to_plot = 3L, grnas_to_plot = NULL, point_size = 0.9, transparency = 0.8, n_max_0_grna_unprtb_plot = 1000, return_indiv_plots = FALSE) {
+plot_assign_grnas <- function(sceptre_object, n_grnas_to_plot = 3L, grnas_to_plot = NULL, point_size = 0.9, transparency = 0.8, return_indiv_plots = FALSE) {
+ n_points_to_plot_per_umi <- 1000
+ n_grnas_to_plot_panel_b <- 1000
if (!sceptre_object@functs_called["assign_grnas"]) {
stop("This `sceptre_object` has not yet had `assign_grnas` called on it.")
}
@@ -161,7 +162,7 @@ plot_assign_grnas <- function(sceptre_object, n_grnas_to_plot = 3L, grnas_to_plo
p = grna_matrix@p,
x = grna_matrix@x,
row_idx = which(grna_id == rownames(grna_matrix)),
- n_cells = ncol(grna_matrix))
+ n_cells = ncol(grna_matrix)) |> as.integer()
df <- data.frame(g = g,
assignment = ifelse(assignment, "pert", "unpert") |> factor(),
grna_id = grna_id |> factor(),
@@ -171,14 +172,11 @@ plot_assign_grnas <- function(sceptre_object, n_grnas_to_plot = 3L, grnas_to_plo
return(df)
}) |> data.table::rbindlist()
- # downsampling the unperturbed cells with 0 grna expression, if the number of those cells
- # exceeds `n_max_0_grna_unprtb_plot`.
- is_0_grna_and_unperturbed <- with(to_plot_a, assignment == "unperturbed" & g == 0)
- if (sum(is_0_grna_and_unperturbed) > n_max_0_grna_unprtb_plot) {
- idx_to_remove <- rownames(to_plot_a)[is_0_grna_and_unperturbed] |>
- sample(sum(is_0_grna_and_unperturbed) - n_max_0_grna_unprtb_plot)
- to_plot_a <- to_plot_a[! rownames(to_plot_a) %in% idx_to_remove, ]
- }
+ # downsample the unperturbed cells
+ to_plot_a <- to_plot_a |>
+ dplyr::group_by(g) |>
+ dplyr::sample_n(size = min(n_points_to_plot_per_umi, dplyr::n())) |>
+ dplyr::ungroup()
# plot a
p_a <- ggplot2::ggplot(data = to_plot_a, mapping = ggplot2::aes(x = assignment, y = g, col = assignment)) +
@@ -198,7 +196,8 @@ plot_assign_grnas <- function(sceptre_object, n_grnas_to_plot = 3L, grnas_to_plo
to_plot_b <- data.frame(x = n_cells_per_grna,
y = names(n_cells_per_grna)) |>
dplyr::arrange(n_cells_per_grna) |>
- dplyr::mutate(y = factor(y, labels = y, levels = y))
+ dplyr::mutate(y = factor(y, labels = y, levels = y)) |>
+ dplyr::sample_n(min(n_grnas_to_plot_panel_b, dplyr::n()))
p_b <- ggplot2::ggplot(data = to_plot_b,
mapping = ggplot2::aes(x = x, y = y)) +
ggplot2::geom_bar(stat = "identity", width = 0.5, fill = "grey90", col = "darkblue") +
@@ -218,12 +217,11 @@ plot_assign_grnas <- function(sceptre_object, n_grnas_to_plot = 3L, grnas_to_plo
} else {
n_grnas_per_cell <- sceptre_object@grnas_per_cell
moi <- mean(n_grnas_per_cell)
- to_plot_c <- data.frame(x = n_grnas_per_cell)
- p_c <- ggplot2::ggplot(data = to_plot_c, mapping = ggplot2::aes(x = x)) +
- ggplot2::geom_histogram(binwidth = 1, fill = "grey90", color = "darkblue") +
- ggplot2::scale_y_continuous(trans = "log1p",
- expand = c(0, 0),
- breaks = 10^(1:8)) +
+ p_c <- ggplot2::ggplot(data = data.frame(x = n_grnas_per_cell),
+ mapping = ggplot2::aes(x = x)) +
+ ggplot2::geom_histogram(binwidth = max(1, 0.02 * length(unique(n_grnas_per_cell))),
+ fill = "grey90", color = "darkblue") +
+ ggplot2::scale_y_continuous(expand = c(0, 0), trans = "log1p", breaks = 10^(1:8)) +
get_my_theme() + ggplot2::ylab("Frequency") +
ggplot2::ggtitle("N gRNAs per cell") +
ggplot2::xlab("N gRNAs") +
diff --git a/R/s4_analysis_functs_1.R b/R/s4_analysis_functs_1.R
index 5f558304..8bd37865 100644
--- a/R/s4_analysis_functs_1.R
+++ b/R/s4_analysis_functs_1.R
@@ -23,8 +23,9 @@ import_data <- function(response_matrix, grna_matrix, grna_target_data_frame, mo
extra_covariates = extra_covariates,
response_names = if (identical(response_names, NA_character_)) rownames(response_matrix) else response_names)
- # 3. make the response matrix row accessible
+ # 3. make the response and grna matrices row accessible
response_matrix <- set_matrix_accessibility(response_matrix, make_row_accessible = TRUE)
+ grna_matrix <- set_matrix_accessibility(grna_matrix, make_row_accessible = TRUE)
# 4. update fields in output object and return
sceptre_object <- methods::new("sceptre_object")
diff --git a/R/sceptre.R b/R/sceptre.R
index 15149568..c51b16f2 100644
--- a/R/sceptre.R
+++ b/R/sceptre.R
@@ -47,7 +47,7 @@ utils::globalVariables(c("n_nonzero_trt", "n_nonzero_cntrl", "pair_str", "assign
#' print(sceptre_object)
#'
#' # 4. run qc
-#' plot_covariates(sceptre_object)
+#' plot_covariates(sceptre_object, p_mito_threshold = 0.075)
#' sceptre_object <- sceptre_object |> run_qc(p_mito_threshold = 0.075)
#' plot(sceptre_object)
#' print(sceptre_object)
@@ -102,7 +102,7 @@ utils::globalVariables(c("n_nonzero_trt", "n_nonzero_cntrl", "pair_str", "assign
#' print(sceptre_object)
#'
#' # 4. run qc
-#' plot_covariates(sceptre_object)
+#' plot_covariates(sceptre_object, p_mito_threshold = 0.075)
#' sceptre_object <- sceptre_object |> run_qc(p_mito_threshold = 0.075)
#' plot(sceptre_object)
#' print(sceptre_object)
diff --git a/man/sceptre.Rd b/man/sceptre.Rd
index 2072eaaf..a23de1b7 100644
--- a/man/sceptre.Rd
+++ b/man/sceptre.Rd
@@ -41,7 +41,7 @@ plot(sceptre_object)
print(sceptre_object)
# 4. run qc
-plot_covariates(sceptre_object)
+plot_covariates(sceptre_object, p_mito_threshold = 0.075)
sceptre_object <- sceptre_object |> run_qc(p_mito_threshold = 0.075)
plot(sceptre_object)
print(sceptre_object)
@@ -96,7 +96,7 @@ plot(sceptre_object)
print(sceptre_object)
# 4. run qc
-plot_covariates(sceptre_object)
+plot_covariates(sceptre_object, p_mito_threshold = 0.075)
sceptre_object <- sceptre_object |> run_qc(p_mito_threshold = 0.075)
plot(sceptre_object)
print(sceptre_object)
From 60ac7b665875dce686bf4716bef324e23896b5b0 Mon Sep 17 00:00:00 2001
From: Tim
Date: Mon, 6 Nov 2023 16:02:24 -0500
Subject: [PATCH 4/9] update test to reflect storage of grna matrix as dgr
matrix
---
man/plot_assign_grnas.Rd | 3 ---
tests/testthat/test-s4_analysis_functs_1.R | 4 ++--
2 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/man/plot_assign_grnas.Rd b/man/plot_assign_grnas.Rd
index 703c6e58..6680cd9d 100644
--- a/man/plot_assign_grnas.Rd
+++ b/man/plot_assign_grnas.Rd
@@ -10,7 +10,6 @@ plot_assign_grnas(
grnas_to_plot = NULL,
point_size = 0.9,
transparency = 0.8,
- n_max_0_grna_unprtb_plot = 1000,
return_indiv_plots = FALSE
)
}
@@ -25,8 +24,6 @@ plot_assign_grnas(
\item{transparency}{(optional; default \code{0.8}) the transparency of the individual points in the plot}
-\item{n_max_0_grna_unprtb_plot}{(optional; default \code{1000}) there may be many cells with a gRNA count of 0 in the unperturbed group. This can slow down the plotting without adding useful information, so at most \code{n_max_0_grna_unprtb_plot} points from this group are plotted. Setting this to \code{Inf} will guarantee no downsampling occurs.}
-
\item{return_indiv_plots}{(optional; default \code{FALSE}) if \code{FALSE}, then a list of \code{ggplot} objects is returned; if \code{TRUE} then a single \code{cowplot} object is returned.}
}
\value{
diff --git a/tests/testthat/test-s4_analysis_functs_1.R b/tests/testthat/test-s4_analysis_functs_1.R
index 4d8c8e98..3448daec 100644
--- a/tests/testthat/test-s4_analysis_functs_1.R
+++ b/tests/testthat/test-s4_analysis_functs_1.R
@@ -6,7 +6,7 @@ test_that("import_data", {
num_responses <- 17
grna_target_data_frame <- make_mock_grna_target_data(c(1,3,1), 1, 1, 6)
grna_matrix <- matrix(rpois(nrow(grna_target_data_frame) * num_cells, 1), ncol = num_cells) |>
- `rownames<-`(grna_target_data_frame$grna_id)
+ `rownames<-`(grna_target_data_frame$grna_id) |> set_matrix_accessibility()
response_matrix <- matrix(rpois(num_responses * num_cells, 1), ncol = num_cells) |>
as("TsparseMatrix")
extra_covariates <- data.frame(x = rep("aaa", num_cells))
@@ -50,7 +50,7 @@ test_that("import_data", {
##### slots set in section 4 of `import_data`
expect_equal(sceptre_object_high_with_ec@response_matrix, set_matrix_accessibility(response_matrix))
- expect_equal(sceptre_object_high_with_ec@grna_matrix, grna_matrix)
+ expect_equal(sceptre_object_high_with_ec@grna_matrix, set_matrix_accessibility(grna_matrix))
expect_equal(sceptre_object_low_no_ec_with_response_names@response_names, paste0("rrr", 1:num_responses))
expect_true(sceptre_object_low_no_ec_with_response_names@low_moi)
From 838f276fd578e0882b72a788ac1f2d9788357352 Mon Sep 17 00:00:00 2001
From: Tim
Date: Mon, 6 Nov 2023 18:51:01 -0500
Subject: [PATCH 5/9] delete only the outputs of write_results in write_results
---
R/s4_analysis_functs_2.R | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/R/s4_analysis_functs_2.R b/R/s4_analysis_functs_2.R
index 8d80803b..b4721e46 100644
--- a/R/s4_analysis_functs_2.R
+++ b/R/s4_analysis_functs_2.R
@@ -280,8 +280,10 @@ get_result <- function(sceptre_object, analysis) {
write_outputs_to_directory <- function(sceptre_object, directory) {
# 0. create directory
if (!dir.exists(directory)) dir.create(path = directory, recursive = TRUE)
- fs <- list.files(path = directory, full.names = TRUE)
- for (f in fs) file.remove(f)
+ fs <- paste0(directory, "/", c("analysis_summary.txt", "plot_assign_grnas.png", "plot_run_qc.png",
+ "plot_run_calibration_check.png", "plot_run_power_check.png", "plot_run_discovery_analysis.png",
+ "results_run_calibration_check.rds", "results_run_power_check.rds", "results_run_discovery_analysis.rds"))
+ for (f in fs) if (file.exists(f)) file.remove(f)
# 1. create analysis_summary.txt file
summary_file_fp <- paste0(directory, "/analysis_summary.txt")
From 7ec6c9223a03c78c1ecd64d5a5f595e4e80c602e Mon Sep 17 00:00:00 2001
From: Tim
Date: Tue, 7 Nov 2023 00:01:29 -0500
Subject: [PATCH 6/9] add log_dir arg to parallel functs
---
R/assign_grna_functions.R | 5 +++--
R/aux_functions.R | 4 ++--
R/medium_level_functs_v2.R | 14 +++++++-------
R/mixture_model_functs.R | 6 +++---
R/s4_analysis_functs_1.R | 5 +++--
R/s4_analysis_functs_2.R | 23 +++++++++++++++--------
6 files changed, 33 insertions(+), 24 deletions(-)
diff --git a/R/assign_grna_functions.R b/R/assign_grna_functions.R
index af1dc635..2b073575 100644
--- a/R/assign_grna_functions.R
+++ b/R/assign_grna_functions.R
@@ -1,4 +1,4 @@
-assign_grnas_to_cells <- function(sceptre_object, print_progress, parallel, n_processors) {
+assign_grnas_to_cells <- function(sceptre_object, print_progress, parallel, n_processors, log_dir) {
# extract pieces from sceptre_object
grna_matrix <- sceptre_object@grna_matrix
grna_target_data_frame <- sceptre_object@grna_target_data_frame
@@ -13,7 +13,8 @@ assign_grnas_to_cells <- function(sceptre_object, print_progress, parallel, n_pr
if (grna_assignment_method == "mixture") {
initial_assignment_list <- assign_grnas_to_cells_mixture(grna_matrix = grna_matrix, cell_covariate_data_frame = cell_covariate_data_frame,
grna_assignment_hyperparameters = grna_assignment_hyperparameters,
- print_progress = print_progress, parallel = parallel, n_processors = n_processors)
+ print_progress = print_progress, parallel = parallel, n_processors = n_processors,
+ log_dir = log_dir)
}
if (grna_assignment_method == "thresholding") {
initial_assignment_list <- assign_grnas_to_cells_thresholding(grna_matrix = grna_matrix,
diff --git a/R/aux_functions.R b/R/aux_functions.R
index f5e7a1a8..2e0e5eb2 100644
--- a/R/aux_functions.R
+++ b/R/aux_functions.R
@@ -78,8 +78,8 @@ partition_response_ids <- function(response_ids, parallel, n_processors) {
}
-get_log_dir <- function() {
- log_dir <- paste0(tempdir(), "/sceptre_logs/")
+get_log_dir <- function(log_dir) {
+ log_dir <- paste0(log_dir, "/sceptre_logs/")
if (!dir.exists(log_dir)) dir.create(log_dir, recursive = TRUE)
return(log_dir)
}
diff --git a/R/medium_level_functs_v2.R b/R/medium_level_functs_v2.R
index 1c311896..29e2c017 100644
--- a/R/medium_level_functs_v2.R
+++ b/R/medium_level_functs_v2.R
@@ -16,7 +16,7 @@ run_perm_test_in_memory <- function(response_matrix, grna_assignments, covariate
synthetic_idxs, output_amount, fit_parametric_curve, B1, B2, B3, calibration_check,
control_group_complement, n_nonzero_trt_thresh, n_nonzero_cntrl_thresh,
side_code, low_moi, response_precomputations, cells_in_use, print_progress,
- parallel, n_processors, analysis_type) {
+ parallel, n_processors, log_dir, analysis_type) {
# 0. define several variables
subset_to_nt_cells <- calibration_check && !control_group_complement
run_outer_regression <- calibration_check || control_group_complement
@@ -34,7 +34,7 @@ run_perm_test_in_memory <- function(response_matrix, grna_assignments, covariate
# 2. define function to loop subset of response IDs
analyze_given_response_ids <- function(curr_response_ids, proc_id = NULL) {
if (parallel && print_progress) {
- f_name <- paste0(get_log_dir(), analysis_type, "_", proc_id, ".out")
+ f_name <- paste0(get_log_dir(log_dir), analysis_type, "_", proc_id, ".out")
file.create(f_name) |> invisible()
} else {
f_name <- NULL
@@ -104,7 +104,7 @@ run_perm_test_in_memory <- function(response_matrix, grna_assignments, covariate
res <- lapply(partitioned_response_ids, analyze_given_response_ids)
} else {
cat(paste0("Running ", analysis_type, " in parallel. "))
- if (print_progress) cat(paste0("Change directories to ", crayon::blue(get_log_dir()), " and view the files ", crayon::blue(paste0(analysis_type, "_*.out")), " for progress updates.\n"))
+ if (print_progress) cat(paste0("Change directories to ", crayon::blue(get_log_dir(log_dir)), " and view the files ", crayon::blue(paste0(analysis_type, "_*.out")), " for progress updates.\n"))
res <- parallel::mclapply(seq_along(partitioned_response_ids),
function(proc_id) analyze_given_response_ids(partitioned_response_ids[[proc_id]], proc_id),
mc.cores = length(partitioned_response_ids))
@@ -124,7 +124,7 @@ run_perm_test_in_memory <- function(response_matrix, grna_assignments, covariate
run_crt_in_memory_v2 <- function(response_matrix, grna_assignments, covariate_matrix, response_grna_group_pairs,
output_amount, fit_parametric_curve, B1, B2, B3, calibration_check, control_group_complement,
n_nonzero_trt_thresh, n_nonzero_cntrl_thresh, side_code, low_moi, response_precomputations,
- cells_in_use, print_progress, parallel, n_processors, analysis_type) {
+ cells_in_use, print_progress, parallel, n_processors, log_dir, analysis_type) {
# 0. define several variables
subset_to_nt_cells <- calibration_check && !control_group_complement
run_outer_regression <- calibration_check || control_group_complement
@@ -142,7 +142,7 @@ run_crt_in_memory_v2 <- function(response_matrix, grna_assignments, covariate_ma
# 2. define the run precomputation function
run_precomp_on_given_responses <- function(curr_response_ids, proc_id = NULL) {
if (parallel && print_progress) {
- f_name <- paste0(get_log_dir(), analysis_type, "_", proc_id, ".out")
+ f_name <- paste0(get_log_dir(log_dir), analysis_type, "_", proc_id, ".out")
file.create(f_name) |> invisible()
} else {
f_name <- NULL
@@ -183,7 +183,7 @@ run_crt_in_memory_v2 <- function(response_matrix, grna_assignments, covariate_ma
res <- lapply(partitioned_response_ids, run_precomp_on_given_responses)
} else {
cat(paste0("Running ", analysis_type, " in parallel. "))
- if (print_progress) cat(paste0("Change directories to ", crayon::blue(get_log_dir()), " and view the files ", crayon::blue(paste0(analysis_type, "_*.out")), " for progress updates. "))
+ if (print_progress) cat(paste0("Change directories to ", crayon::blue(get_log_dir(log_dir)), " and view the files ", crayon::blue(paste0(analysis_type, "_*.out")), " for progress updates. "))
res <- parallel::mclapply(seq_along(partitioned_response_ids),
function(proc_id) run_precomp_on_given_responses(partitioned_response_ids[[proc_id]], proc_id),
mc.cores = length(partitioned_response_ids))
@@ -195,7 +195,7 @@ run_crt_in_memory_v2 <- function(response_matrix, grna_assignments, covariate_ma
# 6. define the function to analyze the pairs
perform_association_analysis <- function(curr_grna_groups, proc_id) {
result_out_list <- vector(mode = "list", length = length(curr_grna_groups))
- f_name <- if (parallel && print_progress) paste0(get_log_dir(), analysis_type, "_", proc_id, ".out") else NULL
+ f_name <- if (parallel && print_progress) paste0(get_log_dir(log_dir), analysis_type, "_", proc_id, ".out") else NULL
for (grna_group_idx in seq_along(curr_grna_groups)) {
curr_grna_group <- get_id_from_idx(grna_group_idx, print_progress, curr_grna_groups,
diff --git a/R/mixture_model_functs.R b/R/mixture_model_functs.R
index 046aa501..c20dc6a1 100644
--- a/R/mixture_model_functs.R
+++ b/R/mixture_model_functs.R
@@ -1,4 +1,4 @@
-assign_grnas_to_cells_mixture <- function(grna_matrix, cell_covariate_data_frame, grna_assignment_hyperparameters, print_progress, parallel, n_processors) {
+assign_grnas_to_cells_mixture <- function(grna_matrix, cell_covariate_data_frame, grna_assignment_hyperparameters, print_progress, parallel, n_processors, log_dir) {
if (!parallel) cat(crayon::red("Note: Set `parallel = TRUE` in the function call to improve speed.\n\n"))
# 0. get random starting guesses for pi and g_pert
starting_guesses <- get_random_starting_guesses(n_em_rep = grna_assignment_hyperparameters$n_em_rep,
@@ -16,7 +16,7 @@ assign_grnas_to_cells_mixture <- function(grna_matrix, cell_covariate_data_frame
# 3. define the function to perform assignments for a set of grnas
analyze_given_grna_ids <- function(curr_grna_ids, proc_id = NULL) {
if (parallel && print_progress) {
- f_name <- paste0(get_log_dir(), "assign_grnas_", proc_id, ".out")
+ f_name <- paste0(get_log_dir(log_dir), "assign_grnas_", proc_id, ".out")
file.create(f_name) |> invisible()
} else {
f_name <- NULL
@@ -42,7 +42,7 @@ assign_grnas_to_cells_mixture <- function(grna_matrix, cell_covariate_data_frame
} else {
cat("Running gRNA assignments in parallel. ")
if (print_progress) {
- cat(paste0("Change directories to ", crayon::blue(get_log_dir()), " and view the files ",
+ cat(paste0("Change directories to ", crayon::blue(get_log_dir(log_dir)), " and view the files ",
crayon::blue("assign_grnas_*.out"), " for progress updates.\n"))
}
grna_ids_partitioned <- partition_response_ids(grna_ids, parallel, n_processors)
diff --git a/R/s4_analysis_functs_1.R b/R/s4_analysis_functs_1.R
index 8bd37865..e6dc7ba1 100644
--- a/R/s4_analysis_functs_1.R
+++ b/R/s4_analysis_functs_1.R
@@ -162,7 +162,8 @@ set_analysis_parameters <- function(sceptre_object,
#' @export
#' @examples
#' # see example via ?sceptre
-assign_grnas <- function(sceptre_object, method = "default", print_progress = TRUE, parallel = FALSE, n_processors = "auto", ...) {
+assign_grnas <- function(sceptre_object, method = "default", print_progress = TRUE, parallel = FALSE,
+ n_processors = "auto", log_dir = tempdir(), ...) {
# 0. verify that function called in correct order
sceptre_object <- perform_status_check_and_update(sceptre_object, "assign_grnas")
@@ -200,7 +201,7 @@ assign_grnas <- function(sceptre_object, method = "default", print_progress = TR
sceptre_object@grna_assignment_hyperparameters <- hyperparameters
# 5. assign the grnas
- sceptre_object <- assign_grnas_to_cells(sceptre_object, print_progress, parallel, n_processors)
+ sceptre_object <- assign_grnas_to_cells(sceptre_object, print_progress, parallel, n_processors, log_dir)
# return
return(sceptre_object)
diff --git a/R/s4_analysis_functs_2.R b/R/s4_analysis_functs_2.R
index b4721e46..73c47251 100644
--- a/R/s4_analysis_functs_2.R
+++ b/R/s4_analysis_functs_2.R
@@ -16,7 +16,7 @@
#' # see example via ?sceptre
run_calibration_check <- function(sceptre_object, output_amount = 1, n_calibration_pairs = NULL,
calibration_group_size = NULL, print_progress = TRUE, parallel = FALSE,
- n_processors = "auto") {
+ n_processors = "auto", log_dir = tempdir()) {
# 0. advance function (if necessary), and check function call
sceptre_object <- skip_assign_grnas_and_run_qc(sceptre_object, parallel)
sceptre_object <- perform_status_check_and_update(sceptre_object, "run_calibration_check")
@@ -46,7 +46,8 @@ run_calibration_check <- function(sceptre_object, output_amount = 1, n_calibrati
output_amount = output_amount,
print_progress = print_progress,
parallel = parallel,
- n_processors = n_processors)
+ n_processors = n_processors,
+ log_dir = log_dir)
# 6. update fields of sceptre object with results
sceptre_object@calibration_result <- out$result |> apply_grouping_to_result(sceptre_object, TRUE) |>
@@ -72,7 +73,8 @@ run_calibration_check <- function(sceptre_object, output_amount = 1, n_calibrati
#' @export
#' @examples
#' # see example via ?sceptre
-run_power_check <- function(sceptre_object, output_amount = 1, print_progress = TRUE, parallel = FALSE, n_processors = "auto") {
+run_power_check <- function(sceptre_object, output_amount = 1, print_progress = TRUE, parallel = FALSE,
+ n_processors = "auto", log_dir = tempdir()) {
# 0. verify that function called in correct order
sceptre_object <- skip_assign_grnas_and_run_qc(sceptre_object, parallel)
sceptre_object <- perform_status_check_and_update(sceptre_object, "run_power_check")
@@ -98,7 +100,8 @@ run_power_check <- function(sceptre_object, output_amount = 1, print_progress =
output_amount = output_amount,
print_progress = print_progress,
parallel = parallel,
- n_processors = n_processors)
+ n_processors = n_processors,
+ log_dir = log_dir)
# 4. update fields of sceptre object with results
sceptre_object@power_result <- out$result |> apply_grouping_to_result(sceptre_object)
@@ -121,7 +124,8 @@ run_power_check <- function(sceptre_object, output_amount = 1, print_progress =
#' @export
#' @examples
#' # see example via ?sceptre
-run_discovery_analysis <- function(sceptre_object, output_amount = 1, print_progress = TRUE, parallel = FALSE, n_processors = "auto") {
+run_discovery_analysis <- function(sceptre_object, output_amount = 1, print_progress = TRUE, parallel = FALSE,
+ n_processors = "auto", log_dir = tempdir()) {
# 0. verify that function called in correct order
sceptre_object <- skip_assign_grnas_and_run_qc(sceptre_object, parallel)
sceptre_object <- perform_status_check_and_update(sceptre_object, "run_discovery_analysis")
@@ -147,7 +151,8 @@ run_discovery_analysis <- function(sceptre_object, output_amount = 1, print_prog
output_amount = output_amount,
print_progress = print_progress,
parallel = parallel,
- n_processors = n_processors)
+ n_processors = n_processors,
+ log_dir = log_dir)
# 4. update fields of sceptre object with results
sceptre_object@discovery_result <- out$result |> apply_grouping_to_result(sceptre_object) |>
@@ -158,7 +163,8 @@ run_discovery_analysis <- function(sceptre_object, output_amount = 1, print_prog
}
-run_sceptre_analysis_high_level <- function(sceptre_object, response_grna_group_pairs, calibration_check, analysis_type, output_amount, print_progress, parallel, n_processors) {
+run_sceptre_analysis_high_level <- function(sceptre_object, response_grna_group_pairs, calibration_check, analysis_type, output_amount,
+ print_progress, parallel, n_processors, log_dir) {
# if running permutations, generate the permutation idxs
if (sceptre_object@run_permutations) {
cat("Generating permutation resamples.")
@@ -187,7 +193,8 @@ run_sceptre_analysis_high_level <- function(sceptre_object, response_grna_group_
side_code = sceptre_object@side_code, low_moi = sceptre_object@low_moi,
response_precomputations = sceptre_object@response_precomputations,
cells_in_use = sceptre_object@cells_in_use, print_progress = print_progress,
- parallel = parallel, n_processors = n_processors, analysis_type = analysis_type)
+ parallel = parallel, n_processors = n_processors, log_dir = log_dir,
+ analysis_type = analysis_type)
# run the method
out <- if (sceptre_object@run_permutations) {
From d830d93ac1e46402a52cc11e46e69e10a3141f60 Mon Sep 17 00:00:00 2001
From: Tim
Date: Sun, 19 Nov 2023 22:47:08 -0500
Subject: [PATCH 7/9] update docs
---
man/assign_grnas.Rd | 1 +
man/run_calibration_check.Rd | 3 ++-
man/run_discovery_analysis.Rd | 3 ++-
man/run_power_check.Rd | 3 ++-
4 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/man/assign_grnas.Rd b/man/assign_grnas.Rd
index 72d627eb..797d3421 100644
--- a/man/assign_grnas.Rd
+++ b/man/assign_grnas.Rd
@@ -10,6 +10,7 @@ assign_grnas(
print_progress = TRUE,
parallel = FALSE,
n_processors = "auto",
+ log_dir = tempdir(),
...
)
}
diff --git a/man/run_calibration_check.Rd b/man/run_calibration_check.Rd
index ce15e6a5..2efa8348 100644
--- a/man/run_calibration_check.Rd
+++ b/man/run_calibration_check.Rd
@@ -11,7 +11,8 @@ run_calibration_check(
calibration_group_size = NULL,
print_progress = TRUE,
parallel = FALSE,
- n_processors = "auto"
+ n_processors = "auto",
+ log_dir = tempdir()
)
}
\arguments{
diff --git a/man/run_discovery_analysis.Rd b/man/run_discovery_analysis.Rd
index c5e3ee17..e1b7a6fc 100644
--- a/man/run_discovery_analysis.Rd
+++ b/man/run_discovery_analysis.Rd
@@ -9,7 +9,8 @@ run_discovery_analysis(
output_amount = 1,
print_progress = TRUE,
parallel = FALSE,
- n_processors = "auto"
+ n_processors = "auto",
+ log_dir = tempdir()
)
}
\arguments{
diff --git a/man/run_power_check.Rd b/man/run_power_check.Rd
index 580dac18..eb4d6126 100644
--- a/man/run_power_check.Rd
+++ b/man/run_power_check.Rd
@@ -9,7 +9,8 @@ run_power_check(
output_amount = 1,
print_progress = TRUE,
parallel = FALSE,
- n_processors = "auto"
+ n_processors = "auto",
+ log_dir = tempdir()
)
}
\arguments{
From b51b980e7f92b4c1d4a158dd8b4ffd84f44b5a37 Mon Sep 17 00:00:00 2001
From: Tim Barry
Date: Fri, 1 Dec 2023 16:02:55 -0500
Subject: [PATCH 8/9] update comments in pairwise_qc.cpp
---
src/pairwise_qc.cpp | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/src/pairwise_qc.cpp b/src/pairwise_qc.cpp
index 4aca9e07..b776cdf8 100644
--- a/src/pairwise_qc.cpp
+++ b/src/pairwise_qc.cpp
@@ -4,7 +4,7 @@ using namespace Rcpp;
// this function outputs four pieces:
// 1. N nonzero mat; this is the number of nonzero cells for each gene-NT gRNA pair (same regardless of control group)
-// 2. n_nonzero_tot; this is the vector giving the number of nonzero cells per gene; when using the NT cells as the complement, we restrict our attention to the NT cells; when using the complement set as the control group, by contrast, we sum over all cells.
+// 2. n_nonzero_tot; this is the vector giving the number of nonzero cells per gene; when using the NT cells as the control group, we restrict our attention to the NT cells; when using the complement set as the control group, by contrast, we sum over all cells.
// 3,4. n nonzero trt, n nonzero cntrl vectors
// [[Rcpp::export]]
List compute_nt_nonzero_matrix_and_n_ok_pairs_v3(IntegerVector j, IntegerVector p, int n_cells_orig, int n_cells_sub,
@@ -25,42 +25,42 @@ List compute_nt_nonzero_matrix_and_n_ok_pairs_v3(IntegerVector j, IntegerVector
for (int i = 0; i < cells_in_use_zero_idx.size(); i ++) cells_in_use_zero_idx[i] = cells_in_use[i] - 1;
// 1. iterate over genes
- for (int column_idx = 0; column_idx < n_genes; column_idx++) {
+ for (int row_idx = 0; row_idx < n_genes; row_idx++) {
// load nonzero positions into the boolean vector y_sub
- load_nonzero_posits(j, p, column_idx, y_orig, y_sub, cells_in_use_zero_idx);
- // 1.2 iterate over nt grnas, adding n nonzero trt to M matrix
+ load_nonzero_posits(j, p, row_idx, y_orig, y_sub, cells_in_use_zero_idx);
+ // iterate over nt grnas, adding n nonzero trt to M matrix
for (int grna_idx = 0; grna_idx < n_nt_grnas; grna_idx ++) {
n_nonzero = 0;
curr_idxs = indiv_nt_grna_idxs[grna_idx];
for (int k = 0; k < curr_idxs.size(); k ++) {
if (!control_group_complement) { // NT cells control group
if (y_sub[all_nt_idxs[curr_idxs[k] - 1] - 1]) n_nonzero ++; // redirection
- } else { // discovery cells control group
+ } else { // complement set control group
if (y_sub[curr_idxs[k] - 1]) n_nonzero ++; // no redirection
}
}
- M(grna_idx, column_idx) = n_nonzero;
+ M(grna_idx, row_idx) = n_nonzero;
}
// 1.2 update n_nonzero_tot for this gene
- n_nonzero_tot[column_idx] = 0;
+ n_nonzero_tot[row_idx] = 0;
if (!control_group_complement) {
- for (int k = 0; k < n_nt_grnas; k ++) n_nonzero_tot[column_idx] += M(k, column_idx);
+ for (int k = 0; k < n_nt_grnas; k ++) n_nonzero_tot[row_idx] += M(k, row_idx);
} else {
- for (int k = 0; k < y_sub.size(); k++) if (y_sub[k]) n_nonzero_tot[column_idx] ++;
+ for (int k = 0; k < y_sub.size(); k++) if (y_sub[k]) n_nonzero_tot[row_idx] ++;
}
// iterate through the discovery pairs containing this gene
- while (to_analyze_response_idxs[pair_pointer] - 1 == column_idx) {
+ while (to_analyze_response_idxs[pair_pointer] - 1 == row_idx) {
curr_idxs = grna_group_idxs[to_analyze_grna_idxs[pair_pointer] - 1];
curr_n_nonzero_trt = 0;
// first, get n nonzero trt
for (int k = 0; k < curr_idxs.size(); k ++) if (y_sub[curr_idxs[k] - 1]) curr_n_nonzero_trt ++;
// second, get n nonzero cntrl
if (!control_group_complement) { // NT cells
- curr_n_nonzero_cntrl = n_nonzero_tot[column_idx];
+ curr_n_nonzero_cntrl = n_nonzero_tot[row_idx];
} else { // complement set
- curr_n_nonzero_cntrl = n_nonzero_tot[column_idx] - curr_n_nonzero_trt;
+ curr_n_nonzero_cntrl = n_nonzero_tot[row_idx] - curr_n_nonzero_trt;
}
n_nonzero_trt[pair_pointer] = curr_n_nonzero_trt;
n_nonzero_cntrl[pair_pointer] = curr_n_nonzero_cntrl;
From 89d7555832d41aa4abfdc0a1061d0c7ce6f60fdc Mon Sep 17 00:00:00 2001
From: Tim Barry
Date: Fri, 8 Dec 2023 13:37:56 -0500
Subject: [PATCH 9/9] fix bug: mt- and MT- are now valid prefixes for
mitochondrial genes
---
R/preprocessing_functions.R | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/R/preprocessing_functions.R b/R/preprocessing_functions.R
index 775ed6c9..51c36b5f 100644
--- a/R/preprocessing_functions.R
+++ b/R/preprocessing_functions.R
@@ -101,7 +101,7 @@ compute_cell_covariates <- function(matrix_in, feature_names, compute_p_mito) {
matrix_in <- set_matrix_accessibility(matrix_in, make_row_accessible = FALSE)
# get MT gene idxs
if (compute_p_mito) {
- mt_gene_idxs <- grep(pattern = "^MT-", x = feature_names)
+ mt_gene_idxs <- grep(pattern = "^MT-", x = feature_names, ignore.case = TRUE)
compute_p_mito <- length(mt_gene_idxs) >= 1
} else {
mt_gene_idxs <- integer()