diff --git a/R/differential_expression.R b/R/differential_expression.R index 4e5f871..97ae99f 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -279,15 +279,17 @@ diffex_fdr <- function(term_data) { #' @returns a grob #' #' @examples +#' library(dplyr) #' regression_significance <- diffex_mzroll( -#' nplug_mzroll_normalized, -#' "normalized_log2_abundance", -#' "limitation + limitation:DR + 0", -#' FDR_cutoff = 0.01, -#' feature_labels = c("UDP", "Lactate", "Serine") -#' ) -#' -#' plot_volcano(regression_significance, 10, 0.1) +#' nplug_mzroll_normalized, +#' "normalized_log2_abundance", +#' "limitation + limitation:DR + 0") %>% +#' dplyr::left_join( +#' nplug_mzroll_normalized$features %>% select(groupId, compoundName), +#' by = "groupId") +#' +#' plot_volcano(regression_significance, 10, 0.1, c("Ribose-P", "acetyl-CoA", "ATP")) +#' #' @export plot_volcano <- function( regression_significance, @@ -316,11 +318,12 @@ plot_volcano <- function( is_discovery = qvalue < FDR_cutoff ) %>% ggplot(aes_string(x = effect_var)) + - {if ("compoundName" %in% colnames(regression_significance)) - {geom_point(aes(y = p.value.trans, color = is_discovery, name = compoundName))} - else {geom_point(aes(y = p.value.trans, color = is_discovery))} - } + - geom_text(aes(label = ifelse(compoundName %in% feature_labels, compoundName, ""), y = p.value.trans, vjust = -0.75)) + + {if ("compoundName" %in% colnames(regression_significance)) { + geom_point(aes(y = p.value.trans, color = is_discovery, name = compoundName)) + + geom_text(aes(label = ifelse(compoundName %in% feature_labels, compoundName, ""), y = p.value.trans, vjust = -0.75)) + } + else {geom_point(aes(y = p.value.trans, color = is_discovery))} + } + facet_wrap(~term, scales = "free_x") + scale_x_continuous("Effect size") + scale_y_continuous(expression(-log[10] ~ "pvalue")) + diff --git a/R/mutate_mzroll_list.R b/R/mutate_mzroll_list.R index 6dea3eb..fe71e5a 100644 --- a/R/mutate_mzroll_list.R +++ b/R/mutate_mzroll_list.R @@ -66,6 +66,15 @@ floor_peaks <- function(mzroll_list, #'@return triple omic data with imputed missing peaks #' #' @examples +#' library(dplyr) +#' +#' lod_values <- nplug_mzroll_augmented[["measurements"]] %>% +#' dplyr::select(groupId, log2_abundance) %>% +#' dplyr::distinct(groupId, .keep_all = TRUE) +#' +#' mzroll_list <- nplug_mzroll_augmented +#' mzroll_list$measurements <- mzroll_list$measurements %>% filter(groupId != 2 & sampleId != 1) +#' #' mzroll_list_imputed <- impute_missing_peaks(mzroll_list, lod_values, "log2_abundance", 0.15) #' #'@export @@ -76,6 +85,18 @@ impute_missing_peaks <- function(mzroll_list, test_mzroll_list(mzroll_list) + stopifnot(colnames(lod_values) %in% c("groupId", rlang::sym(quant_var))) + + if (nrow(lod_values) > nrow(lod_values %>% dplyr::distinct(groupId, .keep_all = TRUE))) { + stop("only one value per feature must be provided to impute missing peaks") + } + + features <- mzroll_list$features %>% dplyr::select(groupId) + + if(!all(features$groupId %in% lod_values$groupId) | nrow(features) != nrow(lod_values)) { + stop("groupId values of lod_value table and feature table of triple omic data must match") + } + valid_quant_var <- setdiff( mzroll_list$design$measurements$variable, c(mzroll_list$design$feature_pk, mzroll_list$design$sample_pk) @@ -94,12 +115,12 @@ impute_missing_peaks <- function(mzroll_list, ) ## impute missing peaks - missing_peaks_imputed <- left_join( + missing_peaks_imputed <- dplyr::left_join( missing_peaks, lod_values, by = c("groupId")) %>% - rowwise() %>% - mutate(!!rlang::sym(quant_var) := rnorm(1, mean = !!rlang::sym(quant_var)+1, sd = imputation_sd)) + dplyr::rowwise() %>% + dplyr::mutate(!!rlang::sym(quant_var) := stats::rnorm(1, mean = !!rlang::sym(quant_var)+1, sd = imputation_sd)) ## merge measured peaks with imputed peaks completed_peaks <- dplyr::bind_rows( @@ -125,6 +146,15 @@ impute_missing_peaks <- function(mzroll_list, #'@return triple omic data with imputed missing peaks #' #' @examples +#' library(dplyr) +#' +#' lod_values <- nplug_mzroll_augmented[["measurements"]] %>% +#' dplyr::select(groupId, log2_abundance) %>% +#' dplyr::distinct(groupId, .keep_all = TRUE) +#' +#' mzroll_list <- nplug_mzroll_augmented +#' mzroll_list$measurements <- mzroll_list$measurements %>% filter(groupId != 2 & sampleId != 1) +#' #' mzroll_list_imputed <- fill_in_missing_peaks(mzroll_list, lod_values, "log2_abundance", 0.15) #' mzroll_list_imputed <- fill_in_missing_peaks(mzroll_list, 12, "log2_abundance") #' @@ -134,8 +164,7 @@ fill_in_missing_peaks <- function(mzroll_list, quant_var = "log2_abundance", imputation_sd = 0.15) { if (is.data.frame(fill_values)) { - stopifnot(colnames(fill_values) %in% c("groupId", rlang::sym(quant_var))) - output <- impute_missing_peaks(mzroll_list, lod_values, quant_var, imputation_sd) + output <- impute_missing_peaks(mzroll_list, fill_values, quant_var, imputation_sd) } else if (is.numeric(fill_values)) { output <- floor_peaks(mzroll_list, fill_values, quant_var) } else { diff --git a/man/fill_in_missing_peaks.Rd b/man/fill_in_missing_peaks.Rd index dee2c94..8c1e1ce 100644 --- a/man/fill_in_missing_peaks.Rd +++ b/man/fill_in_missing_peaks.Rd @@ -28,6 +28,15 @@ If \code{fill_values} is a data frame, this function calls \code{impute_missing_ If it is a numeric vector, 'this function calls \code{floor_peaks()}. Other types are not currently supported } \examples{ +library(dplyr) + +lod_values <- nplug_mzroll_augmented[["measurements"]] \%>\% +dplyr::select(groupId, log2_abundance) \%>\% +dplyr::distinct(groupId, .keep_all = TRUE) + +mzroll_list <- nplug_mzroll_augmented +mzroll_list$measurements <- mzroll_list$measurements \%>\% filter(groupId != 2 & sampleId != 1) + mzroll_list_imputed <- fill_in_missing_peaks(mzroll_list, lod_values, "log2_abundance", 0.15) mzroll_list_imputed <- fill_in_missing_peaks(mzroll_list, 12, "log2_abundance") diff --git a/man/impute_missing_peaks.Rd b/man/impute_missing_peaks.Rd index c3e9c3a..a735c6b 100644 --- a/man/impute_missing_peaks.Rd +++ b/man/impute_missing_peaks.Rd @@ -27,6 +27,15 @@ triple omic data with imputed missing peaks Impute missing peaks with provided feature-level imputation values } \examples{ +library(dplyr) + +lod_values <- nplug_mzroll_augmented[["measurements"]] \%>\% +dplyr::select(groupId, log2_abundance) \%>\% +dplyr::distinct(groupId, .keep_all = TRUE) + +mzroll_list <- nplug_mzroll_augmented +mzroll_list$measurements <- mzroll_list$measurements \%>\% filter(groupId != 2 & sampleId != 1) + mzroll_list_imputed <- impute_missing_peaks(mzroll_list, lod_values, "log2_abundance", 0.15) } diff --git a/man/plot_volcano.Rd b/man/plot_volcano.Rd index 862854b..df6fba4 100644 --- a/man/plot_volcano.Rd +++ b/man/plot_volcano.Rd @@ -28,13 +28,15 @@ a grob Volcano plot } \examples{ +library(dplyr) regression_significance <- diffex_mzroll( - nplug_mzroll_normalized, - "normalized_log2_abundance", - "limitation + limitation:DR + 0", - FDR_cutoff = 0.01, - feature_labels = c("UDP", "Lactate", "Serine") -) +nplug_mzroll_normalized, +"normalized_log2_abundance", +"limitation + limitation:DR + 0") \%>\% +dplyr::left_join( +nplug_mzroll_normalized$features \%>\% select(groupId, compoundName), +by = "groupId") + +plot_volcano(regression_significance, 10, 0.1, c("Ribose-P", "acetyl-CoA", "ATP")) -plot_volcano(regression_significance, 10, 0.1) } diff --git a/vignettes/NPLUG.Rmd b/vignettes/NPLUG.Rmd index 1dc7a87..566f800 100644 --- a/vignettes/NPLUG.Rmd +++ b/vignettes/NPLUG.Rmd @@ -287,7 +287,7 @@ Two visualizations that are particularly useful for EDA are heatmaps (I'm sure y ```{r static_eda, fig.height = 8, fig.width = 8} samples_with_pcs <- final_processed_data %>% - romic::add_pca_loadings(value_var = "normalized_log2_abundance", npcs = 5) + romic::add_pcs(value_var = "normalized_log2_abundance", npcs = 5) romic::plot_bivariate( samples_with_pcs$samples,