From 281eba2b00588f05e928787c7ef969f1ff2a7bee Mon Sep 17 00:00:00 2001 From: delfarahalireza Date: Mon, 6 Nov 2023 11:51:57 -0800 Subject: [PATCH 1/7] issue-21 updated floor_peaks() function --- R/mutate_mzroll_list.R | 105 ++++++++++++++++++++++++++++------------- 1 file changed, 73 insertions(+), 32 deletions(-) diff --git a/R/mutate_mzroll_list.R b/R/mutate_mzroll_list.R index 0e1bb02..51a87a7 100644 --- a/R/mutate_mzroll_list.R +++ b/R/mutate_mzroll_list.R @@ -4,55 +4,96 @@ #' undetected peaks. #' #' @inheritParams test_mzroll_list -#' @param log2_floor_value minimum value to set for low abundance or -#' missing peaks +#' @param floor_value minimum value to set for low abundance or missing peaks #' @param floor_var measurement variable to floor to \code{log2_floor_value}. +#' @param mzrolldb_file_path (optional) path to mzrollDB file, only used for QQQ datasets. +#' @param data_type parameter to differentiate data types. #' #' @return \code{\link{process_mzroll}} #' #' @examples #' floored_peaks <- floor_peaks(nplug_mzroll_augmented, 12) +#' floored_peaks <- floor_peaks(mzroll_list, 100, "log2_abundance", "QQQ") + #' @export floor_peaks <- function(mzroll_list, - log2_floor_value = 12, - floor_var = "log2_abundance") { - test_mzroll_list(mzroll_list) - + floor_value = 12, + floor_var = "log2_abundance", + mzrolldb_file_path = NULL, + data_type = "QE") { + test_mzroll_list_local(mzroll_list) + valid_floor_var <- setdiff( mzroll_list$design$measurements$variable, c(mzroll_list$design$feature_pk, mzroll_list$design$sample_pk) ) - + checkmate::assertChoice(floor_var, valid_floor_var) checkmate::assertNumeric(mzroll_list$measurements[[floor_var]]) - checkmate::assertNumber(log2_floor_value) - - # summarize peaks associated with each peakgroup - - missing_peaks <- tidyr::expand_grid( - groupId = mzroll_list$features$groupId, - sampleId = mzroll_list$samples$sampleId - ) %>% - dplyr::anti_join( + checkmate::assertNumber(floor_value) + + if (data_type == "QQQ") { + ## find missing peaks + missing_peaks <- tidyr::expand_grid( + groupId = mzroll_list$features$groupId, + sampleId = mzroll_list$samples$sampleId) %>% + dplyr::anti_join ( + mzroll_list$measurements, + by = c("groupId", "sampleId") + ) + + ## find groupBackground for missing peaks + ## assign 100 if the groupBackground is zero + group_background <- PDB_import(mzrolldb_file_path) %>% + group_by(groupId) %>% + distinct(groupId, .keep_all = TRUE) %>% + mutate(groupBackground = dplyr::case_when( + groupBackground < floor_value ~ floor_value, + TRUE ~ groupBackground)) %>% + mutate(log2_abundance = log2(groupBackground)) %>% + mutate(groupId = factor(groupId)) %>% + select(groupId, log2_abundance) + + ## impute missing peaks + missing_peaks_imputed <- left_join( + missing_peaks, + group_background, + by = c("groupId")) %>% + rowwise() %>% + mutate(!!rlang::sym(imputation_var) := rnorm(1, mean = !!rlang::sym(imputation_var)+1, sd = 0.1)) + + ## merge measured peaks with imputed peaks + completed_peaks <- dplyr::bind_rows( mzroll_list$measurements, - by = c("groupId", "sampleId") + missing_peaks_imputed + ) + } + else { + missing_peaks <- tidyr::expand_grid( + groupId = mzroll_list$features$groupId, + sampleId = mzroll_list$samples$sampleId + ) %>% + dplyr::anti_join( + mzroll_list$measurements, + by = c("groupId", "sampleId") ) %>% - tibble::as_tibble() %>% - dplyr::mutate(!!rlang::sym(floor_var) := log2_floor_value) - - # combine detected peaks with peaks that were missing for some samples - completed_peaks <- dplyr::bind_rows( - mzroll_list$measurements %>% - dplyr::mutate(!!rlang::sym(floor_var) := dplyr::case_when( - is.na(!!rlang::sym(floor_var)) ~ log2_floor_value, - !!rlang::sym(floor_var) < log2_floor_value ~ log2_floor_value, - !!rlang::sym(floor_var) >= log2_floor_value ~ !!rlang::sym(floor_var) - )), - missing_peaks - ) - + tibble::as_tibble() %>% + dplyr::mutate(!!rlang::sym(floor_var) := floor_value) + + # combine detected peaks with peaks that were missing for some samples + completed_peaks <- dplyr::bind_rows( + mzroll_list$measurements %>% + dplyr::mutate(!!rlang::sym(floor_var) := dplyr::case_when( + is.na(!!rlang::sym(floor_var)) ~ floor_value, + !!rlang::sym(floor_var) < floor_value ~ floor_value, + !!rlang::sym(floor_var) >= floor_value ~ !!rlang::sym(floor_var) + )), + missing_peaks + ) + } + mzroll_list$measurements <- completed_peaks - + return(mzroll_list) } From f88b1568b6feeaa7804c27689ba0272453135d4e Mon Sep 17 00:00:00 2001 From: delfarahalireza Date: Thu, 16 Nov 2023 17:35:55 -0800 Subject: [PATCH 2/7] functions to fill in missing peaks --- NAMESPACE | 2 + R/mutate_mzroll_list.R | 180 +++++++++++++++++++++-------------- man/fill_in_missing_peaks.Rd | 33 +++++++ man/impute_missing_peaks.Rd | 32 +++++++ 4 files changed, 178 insertions(+), 69 deletions(-) create mode 100644 man/fill_in_missing_peaks.Rd create mode 100644 man/impute_missing_peaks.Rd diff --git a/NAMESPACE b/NAMESPACE index f66f484..7babc77 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,9 +3,11 @@ export(collapse_injections) export(collapse_metabolites) export(diffex_mzroll) +export(fill_in_missing_peaks) export(filter_groupIds) export(find_pathway_enrichments) export(floor_peaks) +export(impute_missing_peaks) export(is_has_label) export(lipid_components) export(merge_compounds_tbl) diff --git a/R/mutate_mzroll_list.R b/R/mutate_mzroll_list.R index 51a87a7..c755655 100644 --- a/R/mutate_mzroll_list.R +++ b/R/mutate_mzroll_list.R @@ -4,24 +4,19 @@ #' undetected peaks. #' #' @inheritParams test_mzroll_list -#' @param floor_value minimum value to set for low abundance or missing peaks +#' @param log2_floor_value minimum value to set for low abundance or +#' missing peaks #' @param floor_var measurement variable to floor to \code{log2_floor_value}. -#' @param mzrolldb_file_path (optional) path to mzrollDB file, only used for QQQ datasets. -#' @param data_type parameter to differentiate data types. #' #' @return \code{\link{process_mzroll}} #' #' @examples #' floored_peaks <- floor_peaks(nplug_mzroll_augmented, 12) -#' floored_peaks <- floor_peaks(mzroll_list, 100, "log2_abundance", "QQQ") - #' @export floor_peaks <- function(mzroll_list, - floor_value = 12, - floor_var = "log2_abundance", - mzrolldb_file_path = NULL, - data_type = "QE") { - test_mzroll_list_local(mzroll_list) + log2_floor_value = 12, + floor_var = "log2_abundance") { + test_mzroll_list(mzroll_list) valid_floor_var <- setdiff( mzroll_list$design$measurements$variable, @@ -30,73 +25,120 @@ floor_peaks <- function(mzroll_list, checkmate::assertChoice(floor_var, valid_floor_var) checkmate::assertNumeric(mzroll_list$measurements[[floor_var]]) - checkmate::assertNumber(floor_value) - - if (data_type == "QQQ") { - ## find missing peaks - missing_peaks <- tidyr::expand_grid( - groupId = mzroll_list$features$groupId, - sampleId = mzroll_list$samples$sampleId) %>% - dplyr::anti_join ( - mzroll_list$measurements, - by = c("groupId", "sampleId") - ) - - ## find groupBackground for missing peaks - ## assign 100 if the groupBackground is zero - group_background <- PDB_import(mzrolldb_file_path) %>% - group_by(groupId) %>% - distinct(groupId, .keep_all = TRUE) %>% - mutate(groupBackground = dplyr::case_when( - groupBackground < floor_value ~ floor_value, - TRUE ~ groupBackground)) %>% - mutate(log2_abundance = log2(groupBackground)) %>% - mutate(groupId = factor(groupId)) %>% - select(groupId, log2_abundance) - - ## impute missing peaks - missing_peaks_imputed <- left_join( - missing_peaks, - group_background, - by = c("groupId")) %>% - rowwise() %>% - mutate(!!rlang::sym(imputation_var) := rnorm(1, mean = !!rlang::sym(imputation_var)+1, sd = 0.1)) - - ## merge measured peaks with imputed peaks - completed_peaks <- dplyr::bind_rows( + checkmate::assertNumber(log2_floor_value) + + # summarize peaks associated with each peakgroup + + missing_peaks <- tidyr::expand_grid( + groupId = mzroll_list$features$groupId, + sampleId = mzroll_list$samples$sampleId + ) %>% + dplyr::anti_join( mzroll_list$measurements, - missing_peaks_imputed - ) - } - else { - missing_peaks <- tidyr::expand_grid( - groupId = mzroll_list$features$groupId, - sampleId = mzroll_list$samples$sampleId + by = c("groupId", "sampleId") ) %>% - dplyr::anti_join( - mzroll_list$measurements, - by = c("groupId", "sampleId") - ) %>% - tibble::as_tibble() %>% - dplyr::mutate(!!rlang::sym(floor_var) := floor_value) - - # combine detected peaks with peaks that were missing for some samples - completed_peaks <- dplyr::bind_rows( - mzroll_list$measurements %>% - dplyr::mutate(!!rlang::sym(floor_var) := dplyr::case_when( - is.na(!!rlang::sym(floor_var)) ~ floor_value, - !!rlang::sym(floor_var) < floor_value ~ floor_value, - !!rlang::sym(floor_var) >= floor_value ~ !!rlang::sym(floor_var) - )), - missing_peaks - ) - } + tibble::as_tibble() %>% + dplyr::mutate(!!rlang::sym(floor_var) := log2_floor_value) + + # combine detected peaks with peaks that were missing for some samples + completed_peaks <- dplyr::bind_rows( + mzroll_list$measurements %>% + dplyr::mutate(!!rlang::sym(floor_var) := dplyr::case_when( + is.na(!!rlang::sym(floor_var)) ~ log2_floor_value, + !!rlang::sym(floor_var) < log2_floor_value ~ log2_floor_value, + !!rlang::sym(floor_var) >= log2_floor_value ~ !!rlang::sym(floor_var) + )), + missing_peaks + ) mzroll_list$measurements <- completed_peaks return(mzroll_list) } +#' Impute missing peaks with provided feature-level imputation values +#' +#'@param mzroll_list: data in triple omic structure +#'@param lod_values: a tibble that maps groupId to log2 feature-level imputation values +#'@param quant_var: column to use for peak values +#'@param imputation_sd: standard deviation of Gaussian distribution to use for missing peak imputation +#' +#'@return triple omic data with imputed missing peaks +#' +#' @examples +#' mzroll_list_imputed <- impute_missing_peaks(mzroll_list, lod_values, "log2_abundance", 0.15) +#' +#'@export +impute_missing_peaks <- function(mzroll_list, + lod_values, + quant_var = "log2_abundance", + imputation_sd = 0.15) { + + test_mzroll_list(mzroll_list) + + valid_quant_var <- setdiff( + mzroll_list$design$measurements$variable, + c(mzroll_list$design$feature_pk, mzroll_list$design$sample_pk) + ) + + checkmate::assertChoice(quant_var, valid_quant_var) + checkmate::assertNumeric(mzroll_list$measurements[[quant_var]]) + + ## find missing peaks + missing_peaks <- tidyr::expand_grid( + groupId = mzroll_list$features$groupId, + sampleId = mzroll_list$samples$sampleId) %>% + dplyr::anti_join ( + mzroll_list$measurements, + by = c("groupId", "sampleId") + ) + + ## impute missing peaks + missing_peaks_imputed <- left_join( + missing_peaks, + group_background, + by = c("groupId")) %>% + rowwise() %>% + mutate(!!rlang::sym(quant_var) := rnorm(1, mean = !!rlang::sym(quant_var)+1, sd = imputation_sd)) + + ## merge measured peaks with imputed peaks + completed_peaks <- dplyr::bind_rows( + mzroll_list$measurements, + missing_peaks_imputed + ) + + mzroll_list$measurements <- completed_peaks + return(mzroll_list) +} + +#' Fill in missing peaks +#' +#'@param mzroll_list: data in triple omic structure +#'@param fill_values: either a numeric constant or a tibble that maps groupId to log2 feature-level imputation values +#'@param quant_var: column to use for peak values +#'@param imputation_sd: standard deviation of Gaussian distribution to use for missing peak imputation +#' +#'@return triple omic data with imputed missing peaks +#' +#' @examples +#' mzroll_list_imputed <- fill_in_missing_peaks(mzroll_list, lod_values, "log2_abundance", 0.15) +#' mzroll_list_imputed <- fill_in_missing_peaks(mzroll_list, 12, "log2_abundance") +#' +#'@export +fill_in_missing_peaks <- function(mzroll_list, + fill_values, + quant_var = "log2_abundance", + imputation_sd = 0.15) { + if (is.data.frame(fill_values)) { + stopifnot(colnames(fill_values) %in% c("groupId", rlang::sym(quant_var))) + output <- temp_impute(mzroll_list, lod_values, quant_var, imputation_sd) + } else if (is.numeric(fill_values)) { + output <- floor_peaks(mzroll_list, fill_values, quant_var) + } else { + stop("fill_values should either be a single numeric value or a tibble that maps groupId to feature-level imputation values") + } + return(output) +} #' Normalize Peaks #' diff --git a/man/fill_in_missing_peaks.Rd b/man/fill_in_missing_peaks.Rd new file mode 100644 index 0000000..4acc41f --- /dev/null +++ b/man/fill_in_missing_peaks.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mutate_mzroll_list.R +\name{fill_in_missing_peaks} +\alias{fill_in_missing_peaks} +\title{Fill in missing peaks} +\usage{ +fill_in_missing_peaks( + mzroll_list, + fill_values, + quant_var = "log2_abundance", + imputation_sd = 0.15 +) +} +\arguments{ +\item{mzroll_list:}{data in triple omic structure} + +\item{fill_values:}{either a numeric constant or a tibble that maps groupId to log2 feature-level imputation values} + +\item{quant_var:}{column to use for peak values} + +\item{imputation_sd:}{standard deviation of Gaussian distribution to use for missing peak imputation} +} +\value{ +triple omic data with imputed missing peaks +} +\description{ +Fill in missing peaks +} +\examples{ +mzroll_list_imputed <- fill_in_missing_peaks(mzroll_list, lod_values, "log2_abundance", 0.15) +mzroll_list_imputed <- fill_in_missing_peaks(mzroll_list, 12, "log2_abundance") + +} diff --git a/man/impute_missing_peaks.Rd b/man/impute_missing_peaks.Rd new file mode 100644 index 0000000..c3e9c3a --- /dev/null +++ b/man/impute_missing_peaks.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mutate_mzroll_list.R +\name{impute_missing_peaks} +\alias{impute_missing_peaks} +\title{Impute missing peaks with provided feature-level imputation values} +\usage{ +impute_missing_peaks( + mzroll_list, + lod_values, + quant_var = "log2_abundance", + imputation_sd = 0.15 +) +} +\arguments{ +\item{mzroll_list:}{data in triple omic structure} + +\item{lod_values:}{a tibble that maps groupId to log2 feature-level imputation values} + +\item{quant_var:}{column to use for peak values} + +\item{imputation_sd:}{standard deviation of Gaussian distribution to use for missing peak imputation} +} +\value{ +triple omic data with imputed missing peaks +} +\description{ +Impute missing peaks with provided feature-level imputation values +} +\examples{ +mzroll_list_imputed <- impute_missing_peaks(mzroll_list, lod_values, "log2_abundance", 0.15) + +} From 6e8d2b679ec1418abe497decd59bded8ee3589ae Mon Sep 17 00:00:00 2001 From: delfarahalireza Date: Thu, 16 Nov 2023 17:41:54 -0800 Subject: [PATCH 3/7] fixing an error in the new function --- R/mutate_mzroll_list.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mutate_mzroll_list.R b/R/mutate_mzroll_list.R index c755655..7c21cca 100644 --- a/R/mutate_mzroll_list.R +++ b/R/mutate_mzroll_list.R @@ -131,7 +131,7 @@ fill_in_missing_peaks <- function(mzroll_list, imputation_sd = 0.15) { if (is.data.frame(fill_values)) { stopifnot(colnames(fill_values) %in% c("groupId", rlang::sym(quant_var))) - output <- temp_impute(mzroll_list, lod_values, quant_var, imputation_sd) + output <- impute_missing_peaks(mzroll_list, lod_values, quant_var, imputation_sd) } else if (is.numeric(fill_values)) { output <- floor_peaks(mzroll_list, fill_values, quant_var) } else { From 3f74b800eb8124b4d682001111b908b8a1bd5c82 Mon Sep 17 00:00:00 2001 From: delfarahalireza Date: Thu, 16 Nov 2023 21:55:16 -0800 Subject: [PATCH 4/7] replacing groupBackground with lod_values --- R/mutate_mzroll_list.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mutate_mzroll_list.R b/R/mutate_mzroll_list.R index 7c21cca..ddd4b8b 100644 --- a/R/mutate_mzroll_list.R +++ b/R/mutate_mzroll_list.R @@ -96,7 +96,7 @@ impute_missing_peaks <- function(mzroll_list, ## impute missing peaks missing_peaks_imputed <- left_join( missing_peaks, - group_background, + lod_values, by = c("groupId")) %>% rowwise() %>% mutate(!!rlang::sym(quant_var) := rnorm(1, mean = !!rlang::sym(quant_var)+1, sd = imputation_sd)) From 2ad8e99b1f30565f2b16e70e0063b962ef0e75c3 Mon Sep 17 00:00:00 2001 From: delfarahalireza Date: Fri, 17 Nov 2023 10:16:59 -0800 Subject: [PATCH 5/7] adding description to new function --- R/mutate_mzroll_list.R | 4 ++++ man/fill_in_missing_peaks.Rd | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/R/mutate_mzroll_list.R b/R/mutate_mzroll_list.R index ddd4b8b..6dea3eb 100644 --- a/R/mutate_mzroll_list.R +++ b/R/mutate_mzroll_list.R @@ -113,6 +113,10 @@ impute_missing_peaks <- function(mzroll_list, #' Fill in missing peaks #' +#'@description +#'If \code{fill_values} is a data frame, this function calls \code{impute_missing_peaks()}. +#'If it is a numeric vector, 'this function calls \code{floor_peaks()}. Other types are not currently supported +#' #'@param mzroll_list: data in triple omic structure #'@param fill_values: either a numeric constant or a tibble that maps groupId to log2 feature-level imputation values #'@param quant_var: column to use for peak values diff --git a/man/fill_in_missing_peaks.Rd b/man/fill_in_missing_peaks.Rd index 4acc41f..dee2c94 100644 --- a/man/fill_in_missing_peaks.Rd +++ b/man/fill_in_missing_peaks.Rd @@ -24,7 +24,8 @@ fill_in_missing_peaks( triple omic data with imputed missing peaks } \description{ -Fill in missing peaks +If \code{fill_values} is a data frame, this function calls \code{impute_missing_peaks()}. +If it is a numeric vector, 'this function calls \code{floor_peaks()}. Other types are not currently supported } \examples{ mzroll_list_imputed <- fill_in_missing_peaks(mzroll_list, lod_values, "log2_abundance", 0.15) From abe5470cb0246a9b65962629f7b1012e346f34f4 Mon Sep 17 00:00:00 2001 From: delfarahalireza Date: Tue, 21 Nov 2023 15:28:26 -0800 Subject: [PATCH 6/7] issue-21 adding validations for new functions --- R/differential_expression.R | 29 +++++++++++++++------------ R/mutate_mzroll_list.R | 39 +++++++++++++++++++++++++++++++----- man/fill_in_missing_peaks.Rd | 9 +++++++++ man/impute_missing_peaks.Rd | 9 +++++++++ man/plot_volcano.Rd | 16 ++++++++------- vignettes/NPLUG.Rmd | 2 +- 6 files changed, 78 insertions(+), 26 deletions(-) diff --git a/R/differential_expression.R b/R/differential_expression.R index 4e5f871..97ae99f 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -279,15 +279,17 @@ diffex_fdr <- function(term_data) { #' @returns a grob #' #' @examples +#' library(dplyr) #' regression_significance <- diffex_mzroll( -#' nplug_mzroll_normalized, -#' "normalized_log2_abundance", -#' "limitation + limitation:DR + 0", -#' FDR_cutoff = 0.01, -#' feature_labels = c("UDP", "Lactate", "Serine") -#' ) -#' -#' plot_volcano(regression_significance, 10, 0.1) +#' nplug_mzroll_normalized, +#' "normalized_log2_abundance", +#' "limitation + limitation:DR + 0") %>% +#' dplyr::left_join( +#' nplug_mzroll_normalized$features %>% select(groupId, compoundName), +#' by = "groupId") +#' +#' plot_volcano(regression_significance, 10, 0.1, c("Ribose-P", "acetyl-CoA", "ATP")) +#' #' @export plot_volcano <- function( regression_significance, @@ -316,11 +318,12 @@ plot_volcano <- function( is_discovery = qvalue < FDR_cutoff ) %>% ggplot(aes_string(x = effect_var)) + - {if ("compoundName" %in% colnames(regression_significance)) - {geom_point(aes(y = p.value.trans, color = is_discovery, name = compoundName))} - else {geom_point(aes(y = p.value.trans, color = is_discovery))} - } + - geom_text(aes(label = ifelse(compoundName %in% feature_labels, compoundName, ""), y = p.value.trans, vjust = -0.75)) + + {if ("compoundName" %in% colnames(regression_significance)) { + geom_point(aes(y = p.value.trans, color = is_discovery, name = compoundName)) + + geom_text(aes(label = ifelse(compoundName %in% feature_labels, compoundName, ""), y = p.value.trans, vjust = -0.75)) + } + else {geom_point(aes(y = p.value.trans, color = is_discovery))} + } + facet_wrap(~term, scales = "free_x") + scale_x_continuous("Effect size") + scale_y_continuous(expression(-log[10] ~ "pvalue")) + diff --git a/R/mutate_mzroll_list.R b/R/mutate_mzroll_list.R index 6dea3eb..fe71e5a 100644 --- a/R/mutate_mzroll_list.R +++ b/R/mutate_mzroll_list.R @@ -66,6 +66,15 @@ floor_peaks <- function(mzroll_list, #'@return triple omic data with imputed missing peaks #' #' @examples +#' library(dplyr) +#' +#' lod_values <- nplug_mzroll_augmented[["measurements"]] %>% +#' dplyr::select(groupId, log2_abundance) %>% +#' dplyr::distinct(groupId, .keep_all = TRUE) +#' +#' mzroll_list <- nplug_mzroll_augmented +#' mzroll_list$measurements <- mzroll_list$measurements %>% filter(groupId != 2 & sampleId != 1) +#' #' mzroll_list_imputed <- impute_missing_peaks(mzroll_list, lod_values, "log2_abundance", 0.15) #' #'@export @@ -76,6 +85,18 @@ impute_missing_peaks <- function(mzroll_list, test_mzroll_list(mzroll_list) + stopifnot(colnames(lod_values) %in% c("groupId", rlang::sym(quant_var))) + + if (nrow(lod_values) > nrow(lod_values %>% dplyr::distinct(groupId, .keep_all = TRUE))) { + stop("only one value per feature must be provided to impute missing peaks") + } + + features <- mzroll_list$features %>% dplyr::select(groupId) + + if(!all(features$groupId %in% lod_values$groupId) | nrow(features) != nrow(lod_values)) { + stop("groupId values of lod_value table and feature table of triple omic data must match") + } + valid_quant_var <- setdiff( mzroll_list$design$measurements$variable, c(mzroll_list$design$feature_pk, mzroll_list$design$sample_pk) @@ -94,12 +115,12 @@ impute_missing_peaks <- function(mzroll_list, ) ## impute missing peaks - missing_peaks_imputed <- left_join( + missing_peaks_imputed <- dplyr::left_join( missing_peaks, lod_values, by = c("groupId")) %>% - rowwise() %>% - mutate(!!rlang::sym(quant_var) := rnorm(1, mean = !!rlang::sym(quant_var)+1, sd = imputation_sd)) + dplyr::rowwise() %>% + dplyr::mutate(!!rlang::sym(quant_var) := stats::rnorm(1, mean = !!rlang::sym(quant_var)+1, sd = imputation_sd)) ## merge measured peaks with imputed peaks completed_peaks <- dplyr::bind_rows( @@ -125,6 +146,15 @@ impute_missing_peaks <- function(mzroll_list, #'@return triple omic data with imputed missing peaks #' #' @examples +#' library(dplyr) +#' +#' lod_values <- nplug_mzroll_augmented[["measurements"]] %>% +#' dplyr::select(groupId, log2_abundance) %>% +#' dplyr::distinct(groupId, .keep_all = TRUE) +#' +#' mzroll_list <- nplug_mzroll_augmented +#' mzroll_list$measurements <- mzroll_list$measurements %>% filter(groupId != 2 & sampleId != 1) +#' #' mzroll_list_imputed <- fill_in_missing_peaks(mzroll_list, lod_values, "log2_abundance", 0.15) #' mzroll_list_imputed <- fill_in_missing_peaks(mzroll_list, 12, "log2_abundance") #' @@ -134,8 +164,7 @@ fill_in_missing_peaks <- function(mzroll_list, quant_var = "log2_abundance", imputation_sd = 0.15) { if (is.data.frame(fill_values)) { - stopifnot(colnames(fill_values) %in% c("groupId", rlang::sym(quant_var))) - output <- impute_missing_peaks(mzroll_list, lod_values, quant_var, imputation_sd) + output <- impute_missing_peaks(mzroll_list, fill_values, quant_var, imputation_sd) } else if (is.numeric(fill_values)) { output <- floor_peaks(mzroll_list, fill_values, quant_var) } else { diff --git a/man/fill_in_missing_peaks.Rd b/man/fill_in_missing_peaks.Rd index dee2c94..8c1e1ce 100644 --- a/man/fill_in_missing_peaks.Rd +++ b/man/fill_in_missing_peaks.Rd @@ -28,6 +28,15 @@ If \code{fill_values} is a data frame, this function calls \code{impute_missing_ If it is a numeric vector, 'this function calls \code{floor_peaks()}. Other types are not currently supported } \examples{ +library(dplyr) + +lod_values <- nplug_mzroll_augmented[["measurements"]] \%>\% +dplyr::select(groupId, log2_abundance) \%>\% +dplyr::distinct(groupId, .keep_all = TRUE) + +mzroll_list <- nplug_mzroll_augmented +mzroll_list$measurements <- mzroll_list$measurements \%>\% filter(groupId != 2 & sampleId != 1) + mzroll_list_imputed <- fill_in_missing_peaks(mzroll_list, lod_values, "log2_abundance", 0.15) mzroll_list_imputed <- fill_in_missing_peaks(mzroll_list, 12, "log2_abundance") diff --git a/man/impute_missing_peaks.Rd b/man/impute_missing_peaks.Rd index c3e9c3a..a735c6b 100644 --- a/man/impute_missing_peaks.Rd +++ b/man/impute_missing_peaks.Rd @@ -27,6 +27,15 @@ triple omic data with imputed missing peaks Impute missing peaks with provided feature-level imputation values } \examples{ +library(dplyr) + +lod_values <- nplug_mzroll_augmented[["measurements"]] \%>\% +dplyr::select(groupId, log2_abundance) \%>\% +dplyr::distinct(groupId, .keep_all = TRUE) + +mzroll_list <- nplug_mzroll_augmented +mzroll_list$measurements <- mzroll_list$measurements \%>\% filter(groupId != 2 & sampleId != 1) + mzroll_list_imputed <- impute_missing_peaks(mzroll_list, lod_values, "log2_abundance", 0.15) } diff --git a/man/plot_volcano.Rd b/man/plot_volcano.Rd index 862854b..df6fba4 100644 --- a/man/plot_volcano.Rd +++ b/man/plot_volcano.Rd @@ -28,13 +28,15 @@ a grob Volcano plot } \examples{ +library(dplyr) regression_significance <- diffex_mzroll( - nplug_mzroll_normalized, - "normalized_log2_abundance", - "limitation + limitation:DR + 0", - FDR_cutoff = 0.01, - feature_labels = c("UDP", "Lactate", "Serine") -) +nplug_mzroll_normalized, +"normalized_log2_abundance", +"limitation + limitation:DR + 0") \%>\% +dplyr::left_join( +nplug_mzroll_normalized$features \%>\% select(groupId, compoundName), +by = "groupId") + +plot_volcano(regression_significance, 10, 0.1, c("Ribose-P", "acetyl-CoA", "ATP")) -plot_volcano(regression_significance, 10, 0.1) } diff --git a/vignettes/NPLUG.Rmd b/vignettes/NPLUG.Rmd index 1dc7a87..566f800 100644 --- a/vignettes/NPLUG.Rmd +++ b/vignettes/NPLUG.Rmd @@ -287,7 +287,7 @@ Two visualizations that are particularly useful for EDA are heatmaps (I'm sure y ```{r static_eda, fig.height = 8, fig.width = 8} samples_with_pcs <- final_processed_data %>% - romic::add_pca_loadings(value_var = "normalized_log2_abundance", npcs = 5) + romic::add_pcs(value_var = "normalized_log2_abundance", npcs = 5) romic::plot_bivariate( samples_with_pcs$samples, From 41ffe7736cca9b169058ca89cda2977d1817ff1d Mon Sep 17 00:00:00 2001 From: delfarahalireza Date: Tue, 21 Nov 2023 15:45:42 -0800 Subject: [PATCH 7/7] fixing validation check in impute_missing_peaks() --- R/mutate_mzroll_list.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mutate_mzroll_list.R b/R/mutate_mzroll_list.R index fe71e5a..a066244 100644 --- a/R/mutate_mzroll_list.R +++ b/R/mutate_mzroll_list.R @@ -93,7 +93,7 @@ impute_missing_peaks <- function(mzroll_list, features <- mzroll_list$features %>% dplyr::select(groupId) - if(!all(features$groupId %in% lod_values$groupId) | nrow(features) != nrow(lod_values)) { + if(!all(features$groupId %in% lod_values$groupId)) { stop("groupId values of lod_value table and feature table of triple omic data must match") }