From 5661eca4e63d9b9ab27724d07543e102ccc61817 Mon Sep 17 00:00:00 2001 From: Johanna Fleischman <71276903+johanna0321@users.noreply.github.com> Date: Tue, 14 May 2024 10:38:12 -0700 Subject: [PATCH 1/9] Add zero centering and lm drift normalization functions --- DESCRIPTION | 2 +- R/mutate_mzroll_list.R | 98 +++++++++++++++++++++++++++++++++- man/calicomics.Rd | 17 ++++++ man/normalize_peaks.Rd | 18 +++++++ man/nplug_compounds.Rd | 2 +- man/nplug_mzroll_augmented.Rd | 2 +- man/nplug_mzroll_normalized.Rd | 2 +- man/nplug_samples.Rd | 4 +- 8 files changed, 138 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1ae5581..1aef0e8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -66,5 +66,5 @@ Suggests: dbplyr, testthat Encoding: UTF-8 LazyData: true -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 VignetteBuilder: knitr diff --git a/R/mutate_mzroll_list.R b/R/mutate_mzroll_list.R index a066244..6fb90f2 100644 --- a/R/mutate_mzroll_list.R +++ b/R/mutate_mzroll_list.R @@ -183,12 +183,16 @@ fill_in_missing_peaks <- function(mzroll_list, #' value #' } #' \item{\code{center batches}: batch centering} +#' \item{\code{center}: zero-center each compound} #' \item{\code{reference sample}: compare to reference sample} #' \item{\code{reference condition}: compare each sample to its specified #' reference sample} #' \item{\code{loess}: weighted smoothing of IC over time (adds #' \code{.loess_fit} as a peaks variable in addition to #' \code{norm_peak_varname})} +#' \item{\code{lm}: linear regression smoothing of IC over time (adds +#' \code{lm_estimate} as feature variable in addition to +#' \code{norm_peak_varname})} #' } #' @param quant_peak_varname variable in measurements to use for abundance #' @param norm_peak_varname variable in measurements to add for normalized @@ -269,7 +273,9 @@ normalize_peaks <- function(mzroll_list, "center batches", "normalize_peaks_batch_center", "reference sample", "normalize_peaks_reference_sample", "reference condition", "normalize_peaks_reference_condition", - "loess", "normalize_peaks_loess" + "loess", "normalize_peaks_loess", + "lm", "normalize_peaks_lm", + "center","normalize_peaks_center" ) checkmate::assertChoice( @@ -901,4 +907,94 @@ normalization_refloor <- function(normalized_peaks, } return(normalized_peaks) +} + +#' @inheritParams normalize_peaks_batch_center +#' @param time_col_varname variable in samples table to use for linear +#' correction +#' +#' @rdname normalize_peaks +normalize_peaks_lm <- function (mzroll_list, + quant_peak_varname, + norm_peak_varname, + time_col_varname) +{ + stopifnot(time_col_varname %in% colnames(mzroll_list$samples)) + + time_col_varname_add <- mzroll_list$samples %>% + dplyr::select(c("sampleId", time_col_varname)) + + lm_fit <- mzroll_list$measurements %>% + dplyr::left_join(., time_col_varname_add, by = "sampleId") %>% + tidyr::nest(groupData = -groupId) %>% + dplyr::mutate(lm_fits = purrr::map(groupData, + fit_lm, + quant_peak_varname = quant_peak_varname, + norm_peak_varname = norm_peak_varname, + time_col_varname = time_col_varname)) %>% + dplyr::select(-groupData) %>% + tidyr::unnest(lm_fits) + + lm_fit_measurements <- lm_fit %>% + dplyr::select(!!!rlang::syms(c(colnames(mzroll_list$measurements), norm_peak_varname))) + + lm_fit_features <- lm_fit %>% + dplyr::select(groupId, lm_estimate) %>% + unique() %>% + dplyr::left_join(mzroll_list$features, ., by = "groupId") + + mzroll_list <- romic::update_tomic(mzroll_list, lm_fit_measurements) + mzroll_list <- romic::update_tomic(mzroll_list, lm_fit_features) + + return(mzroll_list) + +} + +fit_lm <- function (groupData, + time_col_varname, + quant_peak_varname, + norm_peak_varname, + order = 1) +{ + + lm_data <- groupData %>% + dplyr::mutate(val_var = !!rlang::sym(quant_peak_varname), + dri_var = !!rlang::sym(time_col_varname)) + + lm_predict <- lm_data %>% + tidyr::drop_na(val_var) + + # Run model + lm_model <- stats::lm(val_var ~ poly(dri_var, degree = order), data = lm_predict) + + # Compute corrected values + lm_apply <- lm_data %>% + dplyr::mutate(median_value = median(.data$val_var, na.rm = T)) %>% + dplyr::mutate(`:=`(!!rlang::sym(norm_peak_varname), + .data$val_var - .env$predict(lm_model, newdata = lm_data) + .data$median_value)) %>% + dplyr::mutate(lm_estimate = summary(.env$lm_model)$coefficient[2,1]) %>% + dplyr::select(-c(val_var, dri_var, median_value)) + + return(lm_apply) + +} + + +#' @inheritParams normalize_peaks_center +#' +#' @rdname normalize_peaks +normalize_peaks_center <- function(mzroll_list, + quant_peak_varname, + norm_peak_varname) +{ + + updated_measurements <- mzroll_list$measurements %>% + dplyr::group_by(groupId) %>% + dplyr::mutate(!!rlang::sym(norm_peak_varname) := + scale(!!rlang::sym(quant_peak_varname), scale = F, center = T)) + + mzroll_list <- romic::update_tomic(mzroll_list, updated_measurements) + + return(mzroll_list) + } \ No newline at end of file diff --git a/man/calicomics.Rd b/man/calicomics.Rd index 229a27a..8b98604 100644 --- a/man/calicomics.Rd +++ b/man/calicomics.Rd @@ -2,6 +2,8 @@ % Please edit documentation in R/claman.R \docType{package} \name{calicomics} +\alias{claman} +\alias{claman-package} \alias{calicomics} \title{\code{claman} package} \description{ @@ -11,3 +13,18 @@ can then be modified through normalization, signal flooring, and filtering. Differential abundance analysis of metabolites can then be performed along with a variety of visualizations. } +\author{ +\strong{Maintainer}: Sean Hackett \email{sean@calicolabs.com} (\href{https://orcid.org/0000-0002-9553-4341}{ORCID}) + +Authors: +\itemize{ + \item Phil Seitzer \email{phillipseitzer@calicolabs.com} + \item Alireza Delfarah \email{delfarah@calicolabs.com} +} + +Other contributors: +\itemize{ + \item Calico Life Sciences LLC [copyright holder, funder] +} + +} diff --git a/man/normalize_peaks.Rd b/man/normalize_peaks.Rd index d5fb822..6d13c1b 100644 --- a/man/normalize_peaks.Rd +++ b/man/normalize_peaks.Rd @@ -8,6 +8,8 @@ \alias{normalize_peaks_reference_sample} \alias{normalize_peaks_reference_condition} \alias{normalize_peaks_loess} +\alias{normalize_peaks_lm} +\alias{normalize_peaks_center} \title{Normalize Peaks} \usage{ normalize_peaks( @@ -68,6 +70,15 @@ normalize_peaks_loess( weights_tribble = NULL, log2_floor_value = 12 ) + +normalize_peaks_lm( + mzroll_list, + quant_peak_varname, + norm_peak_varname, + time_col_varname +) + +normalize_peaks_center(mzroll_list, quant_peak_varname, norm_peak_varname) } \arguments{ \item{mzroll_list}{output of \link{process_mzroll} or @@ -87,12 +98,16 @@ normalize_peaks_loess( value } \item{\code{center batches}: batch centering} + \item{\code{center}: zero-center each compound} \item{\code{reference sample}: compare to reference sample} \item{\code{reference condition}: compare each sample to its specified reference sample} \item{\code{loess}: weighted smoothing of IC over time (adds \code{.loess_fit} as a peaks variable in addition to \code{norm_peak_varname})} + \item{\code{lm}: linear regression smoothing of IC over time (adds + \code{lm_estimate} as feature variable in addition to + \code{norm_peak_varname})} }} \item{quant_peak_varname}{variable in measurements to use for abundance} @@ -121,6 +136,9 @@ reference samples} \item{weights_tribble}{a table containing weights and sample variables to match them to.} + +\item{time_col_varname}{variable in samples table to use for linear +correction} } \value{ a \code{mzroll_list} as generated by \code{\link{process_mzroll}} diff --git a/man/nplug_compounds.Rd b/man/nplug_compounds.Rd index b1e7434..aad9a39 100644 --- a/man/nplug_compounds.Rd +++ b/man/nplug_compounds.Rd @@ -21,9 +21,9 @@ A table of meta-data for all NPLUG features with variables: } \seealso{ Other nplug: +\code{\link{nplug_mzroll}()}, \code{\link{nplug_mzroll_augmented}}, \code{\link{nplug_mzroll_normalized}}, -\code{\link{nplug_mzroll}()}, \code{\link{nplug_samples}} } \concept{nplug} diff --git a/man/nplug_mzroll_augmented.Rd b/man/nplug_mzroll_augmented.Rd index ab63202..e8b9f64 100644 --- a/man/nplug_mzroll_augmented.Rd +++ b/man/nplug_mzroll_augmented.Rd @@ -17,8 +17,8 @@ nplug_mzroll_augmented \seealso{ Other nplug: \code{\link{nplug_compounds}}, -\code{\link{nplug_mzroll_normalized}}, \code{\link{nplug_mzroll}()}, +\code{\link{nplug_mzroll_normalized}}, \code{\link{nplug_samples}} } \concept{nplug} diff --git a/man/nplug_mzroll_normalized.Rd b/man/nplug_mzroll_normalized.Rd index a2e32f5..574bea8 100644 --- a/man/nplug_mzroll_normalized.Rd +++ b/man/nplug_mzroll_normalized.Rd @@ -20,8 +20,8 @@ nplug_mzroll_normalized \seealso{ Other nplug: \code{\link{nplug_compounds}}, -\code{\link{nplug_mzroll_augmented}}, \code{\link{nplug_mzroll}()}, +\code{\link{nplug_mzroll_augmented}}, \code{\link{nplug_samples}} } \concept{nplug} diff --git a/man/nplug_samples.Rd b/man/nplug_samples.Rd index 620afc6..494ec45 100644 --- a/man/nplug_samples.Rd +++ b/man/nplug_samples.Rd @@ -39,9 +39,9 @@ A table of meta-data for all NPLUG samples with variables: \seealso{ Other nplug: \code{\link{nplug_compounds}}, +\code{\link{nplug_mzroll}()}, \code{\link{nplug_mzroll_augmented}}, -\code{\link{nplug_mzroll_normalized}}, -\code{\link{nplug_mzroll}()} +\code{\link{nplug_mzroll_normalized}} } \concept{nplug} \keyword{datasets} From 739137a1c99377c840181d4488070a398446d424 Mon Sep 17 00:00:00 2001 From: Johanna Fleischman <71276903+johanna0321@users.noreply.github.com> Date: Thu, 16 May 2024 14:01:31 -0700 Subject: [PATCH 2/9] Modify median polish function --- R/mutate_mzroll_list.R | 51 +++++++++++++++++++++++++++++++++--------- man/normalize_peaks.Rd | 4 ++++ 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/R/mutate_mzroll_list.R b/R/mutate_mzroll_list.R index 6fb90f2..adbdd1d 100644 --- a/R/mutate_mzroll_list.R +++ b/R/mutate_mzroll_list.R @@ -325,12 +325,16 @@ normalize_peaks <- function(mzroll_list, #' #' @inheritParams normalize_peaks #' @inheritParams floor_peaks +#' @param filter_values groupIds on which to calculate the median polish scaling +#' factor #' #' @rdname normalize_peaks normalize_peaks_median_polish <- function(mzroll_list, quant_peak_varname, norm_peak_varname, + filter_values = NULL, log2_floor_value = NA) { + stopifnot(length(log2_floor_value) == 1) if (!is.na(log2_floor_value)) { stopifnot(class(log2_floor_value) == "numeric") @@ -351,7 +355,27 @@ normalize_peaks_median_polish <- function(mzroll_list, ) } - sample_scaling_factors <- normalization_peaks %>% + if (!is.null(filter_values)){ + if (!any(filter_values %in% unique(mzroll_list$features$groupId))){ + + # If groupId filter values not in design, + # perform median polish on all compounds + warning("groupId filter values not found in design; performing median polish on all groupIds") + sample_scaling_factors <- normalization_peaks + } else { + + # If they are present, filter on these compounds + sample_scaling_factors <- normalization_peaks %>% + dplyr::filter(groupId %in% filter_values) + } + } else { + + # If not groupId filter values provided, + # calculate scaling factor on all compounds + sample_scaling_factors <- normalization_peaks + } + + sample_scaling_factors <- sample_scaling_factors %>% dplyr::group_by(groupId) %>% dplyr::mutate( median_abund = stats::median(!!rlang::sym(quant_peak_varname)) @@ -361,7 +385,7 @@ normalize_peaks_median_polish <- function(mzroll_list, diff_to_median = !!rlang::sym(quant_peak_varname) - median_abund ) %>% dplyr::group_by(sampleId) %>% - dplyr::summarize(scaling_factor = stats::median(diff_to_median)) + dplyr::summarize(median_polish_scaling_factor = stats::median(diff_to_median)) missing_sample_scaling_factors <- mzroll_list$samples %>% dplyr::anti_join(sample_scaling_factors, by = "sampleId") @@ -382,7 +406,7 @@ normalize_peaks_median_polish <- function(mzroll_list, dplyr::left_join(sample_scaling_factors, by = "sampleId") %>% dplyr::mutate( !!rlang::sym(norm_peak_varname) := - !!rlang::sym(quant_peak_varname) - scaling_factor + !!rlang::sym(quant_peak_varname) - median_polish_scaling_factor ) %>% # measurements starting at limit of detection are reset to # log2_floor_value @@ -393,7 +417,7 @@ normalize_peaks_median_polish <- function(mzroll_list, ) %>% dplyr::mutate(!!rlang::sym(norm_peak_varname) := log2_floor_value) ) %>% - dplyr::select(-scaling_factor) %>% + dplyr::select(-median_polish_scaling_factor) %>% # measurements pushed below limit of detection are reset to # log2_floor_value dplyr::mutate( @@ -405,13 +429,18 @@ normalize_peaks_median_polish <- function(mzroll_list, dplyr::left_join(sample_scaling_factors, by = "sampleId") %>% dplyr::mutate( !!rlang::sym(norm_peak_varname) := - !!rlang::sym(quant_peak_varname) - scaling_factor + !!rlang::sym(quant_peak_varname) - median_polish_scaling_factor ) %>% - dplyr::select(-scaling_factor) + dplyr::select(-median_polish_scaling_factor) } mzroll_list <- romic::update_tomic(mzroll_list, updated_measurements) + updated_samples <- mzroll_list$samples %>% + dplyr::left_join(., sample_scaling_factors, by = "sampleId") + + mzroll_list <- romic::update_tomic(mzroll_list, updated_samples) + return(mzroll_list) } @@ -980,12 +1009,12 @@ fit_lm <- function (groupData, } -#' @inheritParams normalize_peaks_center +#' @inheritParams normalize_peaks_batch_center #' #' @rdname normalize_peaks -normalize_peaks_center <- function(mzroll_list, - quant_peak_varname, - norm_peak_varname) +normalize_peaks_center <- function (mzroll_list, + quant_peak_varname, + norm_peak_varname) { updated_measurements <- mzroll_list$measurements %>% @@ -997,4 +1026,4 @@ normalize_peaks_center <- function(mzroll_list, return(mzroll_list) -} \ No newline at end of file +} diff --git a/man/normalize_peaks.Rd b/man/normalize_peaks.Rd index 6d13c1b..614039b 100644 --- a/man/normalize_peaks.Rd +++ b/man/normalize_peaks.Rd @@ -24,6 +24,7 @@ normalize_peaks_median_polish( mzroll_list, quant_peak_varname, norm_peak_varname, + filter_values = NULL, log2_floor_value = NA ) @@ -117,6 +118,9 @@ abundance} \item{...}{additional arguments to pass to normalization methods} +\item{filter_values}{groupIds on which to calculate the median polish scaling +factor} + \item{log2_floor_value}{minimum value to set for low abundance or missing peaks} From 6186c8a0a261c2ed81dc336e7c70ff7bb0faebec Mon Sep 17 00:00:00 2001 From: Johanna Fleischman <71276903+johanna0321@users.noreply.github.com> Date: Thu, 16 May 2024 14:05:17 -0700 Subject: [PATCH 3/9] Add romic parameter to test_mzroll_list --- R/utils.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/utils.R b/R/utils.R index c3f02b3..c6aba85 100644 --- a/R/utils.R +++ b/R/utils.R @@ -2,6 +2,8 @@ #' #' @param mzroll_list output of \link{process_mzroll} or #' \link{process_mzroll_multi} +#' @param fast_check if `TRUE` then skip some checks which are slow and that are +#' generally only needed when a `tomic` object is first created #' #' \itemize{ #' \item{features: one row per unique analyte (defined by a @@ -10,8 +12,6 @@ #' \item{measurements: one row per peak (samples x peakgroups)} #' } #' -#' @inheritParams romic:::check_triple_omic -#' test_mzroll_list <- function(mzroll_list, fast_check = TRUE) { checkmate::assertClass(mzroll_list, "tomic") checkmate::assertClass(mzroll_list, "mzroll") From 5ef645ee235c5183df9844bb33855d3bfaf5d337 Mon Sep 17 00:00:00 2001 From: Johanna Fleischman <71276903+johanna0321@users.noreply.github.com> Date: Thu, 16 May 2024 14:10:33 -0700 Subject: [PATCH 4/9] Update documentation and comments --- R/mutate_mzroll_list.R | 5 +++++ man/collapse_injections.Rd | 9 +-------- man/collapse_metabolites.Rd | 9 +-------- man/diffex_mzroll.Rd | 9 +-------- man/filter_groupIds.Rd | 9 +-------- man/find_pathway_enrichments.Rd | 9 +-------- man/floor_peaks.Rd | 9 +-------- man/merge_compounds_tbl.Rd | 9 +-------- man/merge_samples_tbl.Rd | 9 +-------- man/normalize_peaks.Rd | 9 +-------- man/plot_barplot.Rd | 9 +-------- man/plot_compare_injection.Rd | 9 +-------- man/test_mzroll_list.Rd | 5 ++++- 13 files changed, 20 insertions(+), 89 deletions(-) diff --git a/R/mutate_mzroll_list.R b/R/mutate_mzroll_list.R index adbdd1d..4a57223 100644 --- a/R/mutate_mzroll_list.R +++ b/R/mutate_mzroll_list.R @@ -999,6 +999,11 @@ fit_lm <- function (groupData, # Compute corrected values lm_apply <- lm_data %>% dplyr::mutate(median_value = median(.data$val_var, na.rm = T)) %>% + + # Assign new normalized variable norm_peak_varname: + # Predict values from linear fit are subtracted from quant_peak_varname, + # which centers the data around zero + # Median value from quant_peak_varname is added back to maintain abundance value dplyr::mutate(`:=`(!!rlang::sym(norm_peak_varname), .data$val_var - .env$predict(lm_model, newdata = lm_data) + .data$median_value)) %>% dplyr::mutate(lm_estimate = summary(.env$lm_model)$coefficient[2,1]) %>% diff --git a/man/collapse_injections.Rd b/man/collapse_injections.Rd index 1c48ccd..ea245c7 100644 --- a/man/collapse_injections.Rd +++ b/man/collapse_injections.Rd @@ -13,14 +13,7 @@ collapse_injections( } \arguments{ \item{mzroll_list}{output of \link{process_mzroll} or - \link{process_mzroll_multi} - -\itemize{ - \item{features: one row per unique analyte (defined by a - unique groupId)}, - \item{samples: one row per unique sample (defined by a unique sampleId)}, - \item{measurements: one row per peak (samples x peakgroups)} - }} +\link{process_mzroll_multi}} \item{grouping_vars}{character vector of sample variables to use for grouping} diff --git a/man/collapse_metabolites.Rd b/man/collapse_metabolites.Rd index 3244c44..c068fee 100644 --- a/man/collapse_metabolites.Rd +++ b/man/collapse_metabolites.Rd @@ -12,14 +12,7 @@ collapse_metabolites( } \arguments{ \item{mzroll_list}{output of \link{process_mzroll} or - \link{process_mzroll_multi} - -\itemize{ - \item{features: one row per unique analyte (defined by a - unique groupId)}, - \item{samples: one row per unique sample (defined by a unique sampleId)}, - \item{measurements: one row per peak (samples x peakgroups)} - }} +\link{process_mzroll_multi}} \item{preserve_distinct_methods}{if TRUE then collapse metabolites for each method separately. If FALSE, collapse over methods.} diff --git a/man/diffex_mzroll.Rd b/man/diffex_mzroll.Rd index 89319c2..3dd9da2 100644 --- a/man/diffex_mzroll.Rd +++ b/man/diffex_mzroll.Rd @@ -15,14 +15,7 @@ diffex_mzroll( } \arguments{ \item{mzroll_list}{output of \link{process_mzroll} or - \link{process_mzroll_multi} - -\itemize{ - \item{features: one row per unique analyte (defined by a - unique groupId)}, - \item{samples: one row per unique sample (defined by a unique sampleId)}, - \item{measurements: one row per peak (samples x peakgroups)} - }} +\link{process_mzroll_multi}} \item{value_var}{measurement variable} diff --git a/man/filter_groupIds.Rd b/man/filter_groupIds.Rd index 2f17237..8d5810b 100644 --- a/man/filter_groupIds.Rd +++ b/man/filter_groupIds.Rd @@ -8,14 +8,7 @@ filter_groupIds(mzroll_list, groupIds, invert = FALSE) } \arguments{ \item{mzroll_list}{output of \link{process_mzroll} or - \link{process_mzroll_multi} - -\itemize{ - \item{features: one row per unique analyte (defined by a - unique groupId)}, - \item{samples: one row per unique sample (defined by a unique sampleId)}, - \item{measurements: one row per peak (samples x peakgroups)} - }} +\link{process_mzroll_multi}} \item{groupIds}{groupIds to retain} diff --git a/man/find_pathway_enrichments.Rd b/man/find_pathway_enrichments.Rd index c24dd1c..fc7876d 100644 --- a/man/find_pathway_enrichments.Rd +++ b/man/find_pathway_enrichments.Rd @@ -17,14 +17,7 @@ find_pathway_enrichments( } \arguments{ \item{mzroll_list}{output of \link{process_mzroll} or - \link{process_mzroll_multi} - -\itemize{ - \item{features: one row per unique analyte (defined by a - unique groupId)}, - \item{samples: one row per unique sample (defined by a unique sampleId)}, - \item{measurements: one row per peak (samples x peakgroups)} - }} +\link{process_mzroll_multi}} \item{regression_significance}{returned by \code{\link{diffex_mzroll}}; a tibble of tests performed.} diff --git a/man/floor_peaks.Rd b/man/floor_peaks.Rd index 04babcf..ba1a2a4 100644 --- a/man/floor_peaks.Rd +++ b/man/floor_peaks.Rd @@ -8,14 +8,7 @@ floor_peaks(mzroll_list, log2_floor_value = 12, floor_var = "log2_abundance") } \arguments{ \item{mzroll_list}{output of \link{process_mzroll} or - \link{process_mzroll_multi} - -\itemize{ - \item{features: one row per unique analyte (defined by a - unique groupId)}, - \item{samples: one row per unique sample (defined by a unique sampleId)}, - \item{measurements: one row per peak (samples x peakgroups)} - }} +\link{process_mzroll_multi}} \item{log2_floor_value}{minimum value to set for low abundance or missing peaks} diff --git a/man/merge_compounds_tbl.Rd b/man/merge_compounds_tbl.Rd index 1586e58..b6f3c97 100644 --- a/man/merge_compounds_tbl.Rd +++ b/man/merge_compounds_tbl.Rd @@ -8,14 +8,7 @@ merge_compounds_tbl(mzroll_list, compounds_tbl, by = "compoundName") } \arguments{ \item{mzroll_list}{output of \link{process_mzroll} or - \link{process_mzroll_multi} - -\itemize{ - \item{features: one row per unique analyte (defined by a - unique groupId)}, - \item{samples: one row per unique sample (defined by a unique sampleId)}, - \item{measurements: one row per peak (samples x peakgroups)} - }} +\link{process_mzroll_multi}} \item{compounds_tbl}{Table of compound metadata} diff --git a/man/merge_samples_tbl.Rd b/man/merge_samples_tbl.Rd index fccc699..0338a30 100644 --- a/man/merge_samples_tbl.Rd +++ b/man/merge_samples_tbl.Rd @@ -8,14 +8,7 @@ merge_samples_tbl(mzroll_list, samples_tbl, id_strings, exact = FALSE) } \arguments{ \item{mzroll_list}{output of \link{process_mzroll} or - \link{process_mzroll_multi} - -\itemize{ - \item{features: one row per unique analyte (defined by a - unique groupId)}, - \item{samples: one row per unique sample (defined by a unique sampleId)}, - \item{measurements: one row per peak (samples x peakgroups)} - }} +\link{process_mzroll_multi}} \item{samples_tbl}{Table of sample metadata} diff --git a/man/normalize_peaks.Rd b/man/normalize_peaks.Rd index 614039b..1c569bf 100644 --- a/man/normalize_peaks.Rd +++ b/man/normalize_peaks.Rd @@ -83,14 +83,7 @@ normalize_peaks_center(mzroll_list, quant_peak_varname, norm_peak_varname) } \arguments{ \item{mzroll_list}{output of \link{process_mzroll} or - \link{process_mzroll_multi} - -\itemize{ - \item{features: one row per unique analyte (defined by a - unique groupId)}, - \item{samples: one row per unique sample (defined by a unique sampleId)}, - \item{measurements: one row per peak (samples x peakgroups)} - }} +\link{process_mzroll_multi}} \item{normalization_method}{Normalization method to apply \itemize{ diff --git a/man/plot_barplot.Rd b/man/plot_barplot.Rd index cd96a86..2c1c30c 100644 --- a/man/plot_barplot.Rd +++ b/man/plot_barplot.Rd @@ -8,14 +8,7 @@ plot_barplot(mzroll_list, groupIds, grouping_vars, value_var, fill_var = NULL) } \arguments{ \item{mzroll_list}{output of \link{process_mzroll} or - \link{process_mzroll_multi} - -\itemize{ - \item{features: one row per unique analyte (defined by a - unique groupId)}, - \item{samples: one row per unique sample (defined by a unique sampleId)}, - \item{measurements: one row per peak (samples x peakgroups)} - }} +\link{process_mzroll_multi}} \item{groupIds}{groupIds for compounds to plot} diff --git a/man/plot_compare_injection.Rd b/man/plot_compare_injection.Rd index a09aafe..4b78fa8 100644 --- a/man/plot_compare_injection.Rd +++ b/man/plot_compare_injection.Rd @@ -12,14 +12,7 @@ plot_compare_injection( } \arguments{ \item{mzroll_list}{output of \link{process_mzroll} or - \link{process_mzroll_multi} - -\itemize{ - \item{features: one row per unique analyte (defined by a - unique groupId)}, - \item{samples: one row per unique sample (defined by a unique sampleId)}, - \item{measurements: one row per peak (samples x peakgroups)} - }} +\link{process_mzroll_multi}} \item{grouping_vars}{character vector of sample variables to use for grouping} diff --git a/man/test_mzroll_list.Rd b/man/test_mzroll_list.Rd index d630a5c..246fba6 100644 --- a/man/test_mzroll_list.Rd +++ b/man/test_mzroll_list.Rd @@ -8,7 +8,10 @@ test_mzroll_list(mzroll_list, fast_check = TRUE) } \arguments{ \item{mzroll_list}{output of \link{process_mzroll} or - \link{process_mzroll_multi} +\link{process_mzroll_multi}} + +\item{fast_check}{if `TRUE` then skip some checks which are slow and that are +generally only needed when a `tomic` object is first created \itemize{ \item{features: one row per unique analyte (defined by a From 859e9a760b3c455d3436517e6f324ce805887ba3 Mon Sep 17 00:00:00 2001 From: Johanna Fleischman <71276903+johanna0321@users.noreply.github.com> Date: Thu, 16 May 2024 14:44:03 -0700 Subject: [PATCH 5/9] Update function documentation --- R/mutate_mzroll_list.R | 16 ++++++++-------- man/fill_in_missing_peaks.Rd | 8 ++++---- man/impute_missing_peaks.Rd | 8 ++++---- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/R/mutate_mzroll_list.R b/R/mutate_mzroll_list.R index 4a57223..b4224ca 100644 --- a/R/mutate_mzroll_list.R +++ b/R/mutate_mzroll_list.R @@ -58,10 +58,10 @@ floor_peaks <- function(mzroll_list, #' Impute missing peaks with provided feature-level imputation values #' -#'@param mzroll_list: data in triple omic structure -#'@param lod_values: a tibble that maps groupId to log2 feature-level imputation values -#'@param quant_var: column to use for peak values -#'@param imputation_sd: standard deviation of Gaussian distribution to use for missing peak imputation +#'@param mzroll_list data in triple omic structure +#'@param lod_values a tibble that maps groupId to log2 feature-level imputation values +#'@param quant_var column to use for peak values +#'@param imputation_sd standard deviation of Gaussian distribution to use for missing peak imputation #' #'@return triple omic data with imputed missing peaks #' @@ -138,10 +138,10 @@ impute_missing_peaks <- function(mzroll_list, #'If \code{fill_values} is a data frame, this function calls \code{impute_missing_peaks()}. #'If it is a numeric vector, 'this function calls \code{floor_peaks()}. Other types are not currently supported #' -#'@param mzroll_list: data in triple omic structure -#'@param fill_values: either a numeric constant or a tibble that maps groupId to log2 feature-level imputation values -#'@param quant_var: column to use for peak values -#'@param imputation_sd: standard deviation of Gaussian distribution to use for missing peak imputation +#'@param mzroll_list data in triple omic structure +#'@param fill_values either a numeric constant or a tibble that maps groupId to log2 feature-level imputation values +#'@param quant_var column to use for peak values +#'@param imputation_sd standard deviation of Gaussian distribution to use for missing peak imputation #' #'@return triple omic data with imputed missing peaks #' diff --git a/man/fill_in_missing_peaks.Rd b/man/fill_in_missing_peaks.Rd index 8c1e1ce..ee4dae7 100644 --- a/man/fill_in_missing_peaks.Rd +++ b/man/fill_in_missing_peaks.Rd @@ -12,13 +12,13 @@ fill_in_missing_peaks( ) } \arguments{ -\item{mzroll_list:}{data in triple omic structure} +\item{mzroll_list}{data in triple omic structure} -\item{fill_values:}{either a numeric constant or a tibble that maps groupId to log2 feature-level imputation values} +\item{fill_values}{either a numeric constant or a tibble that maps groupId to log2 feature-level imputation values} -\item{quant_var:}{column to use for peak values} +\item{quant_var}{column to use for peak values} -\item{imputation_sd:}{standard deviation of Gaussian distribution to use for missing peak imputation} +\item{imputation_sd}{standard deviation of Gaussian distribution to use for missing peak imputation} } \value{ triple omic data with imputed missing peaks diff --git a/man/impute_missing_peaks.Rd b/man/impute_missing_peaks.Rd index a735c6b..74d6cac 100644 --- a/man/impute_missing_peaks.Rd +++ b/man/impute_missing_peaks.Rd @@ -12,13 +12,13 @@ impute_missing_peaks( ) } \arguments{ -\item{mzroll_list:}{data in triple omic structure} +\item{mzroll_list}{data in triple omic structure} -\item{lod_values:}{a tibble that maps groupId to log2 feature-level imputation values} +\item{lod_values}{a tibble that maps groupId to log2 feature-level imputation values} -\item{quant_var:}{column to use for peak values} +\item{quant_var}{column to use for peak values} -\item{imputation_sd:}{standard deviation of Gaussian distribution to use for missing peak imputation} +\item{imputation_sd}{standard deviation of Gaussian distribution to use for missing peak imputation} } \value{ triple omic data with imputed missing peaks From 21ccf0381c7449fb0890533aedd2ca42cda11ef6 Mon Sep 17 00:00:00 2001 From: Johanna Fleischman <71276903+johanna0321@users.noreply.github.com> Date: Thu, 16 May 2024 16:02:07 -0700 Subject: [PATCH 6/9] Update median polish documentation --- R/mutate_mzroll_list.R | 70 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/R/mutate_mzroll_list.R b/R/mutate_mzroll_list.R index b4224ca..c2236ae 100644 --- a/R/mutate_mzroll_list.R +++ b/R/mutate_mzroll_list.R @@ -178,7 +178,9 @@ fill_in_missing_peaks <- function(mzroll_list, #' @inheritParams test_mzroll_list #' @param normalization_method Normalization method to apply #' \itemize{ -#' \item{\code{median polish}: column normalization based on average signal} +#' \item{\code{median polish}: column normalization based on average signal +#' (adds \code{median_polish_scaling_factor} as feature variable in addition to +#' \code{norm_peak_varname})} #' \item{\code{loading value}: column normalization using a sample-level #' value #' } @@ -442,6 +444,72 @@ normalize_peaks_median_polish <- function(mzroll_list, mzroll_list <- romic::update_tomic(mzroll_list, updated_samples) return(mzroll_list) + +} + + +#' Predict Dilutions from Median Polish Scaling Factor +#' +#' Using `median_polish_scaling_factor` output from `normalize_peaks_median_polish`, +#' predict sample-wise dilutions +#' +#' @details This function performs an inverse-normalization on the scaling factors, +#' and scales the samples based on the observed maximum scaling factor. Assumes that +#' median polish is perform on log2 transformed data +#' +#' @param mzroll_list data in triple omic structure +#' @param scaling_factor scaling factor, defaults to `median_polish_scaling_factor` +#' and must be a `samples` column in `mzroll_list` +#' @param norm_scale_varname variable in samples to add for dilution predictions +#' @param group_var optional grouping variable on which to calculate maximum dilution +#' +#' @return a \code{mzroll_list} with \code{norm_scale_varname} variable added to +#' samples +#' +#' @export +median_polish_predict_dilutions <- function (mzroll_list, + scaling_factor = "median_polish_scaling_factor", + norm_scale_varname = "median_polish_predicted_dilutions", + group_vars = NULL) { + test_mzroll_list(mzroll_list) + + checkmate::assertString(scaling_factor) + if (!(scaling_factor %in% colnames(mzroll_list$samples))) { + stop( + "\"scaling_factor\":", + scaling_factor, + ", not present in samples table" + ) + } + checkmate::assertString(norm_scale_varname) + + updated_samples <- mzroll_list$samples %>% + dplyr::mutate(temp_scaling_factor = !!rlang::sym(scaling_factor)) %>% + dplyr::mutate(inverse_log_scaling_factor = 2^temp_scaling_factor) + + if(!is.null(group_var) && any(group_vars %in% colnames(mzroll_list$samples))) { + + updated_samples <- updated_samples %>% + dplyr::group_by(group_vars) %>% + dplyr::mutate(m = max(inverse_log_scaling_factor)) %>% + dplyr::ungroup() + + } else { + + updated_samples <- updated_samples %>% + dplyr::mutate(m = max(inverse_log_scaling_factor)) + + } + + updated_samples <- updated_samples %>% + dplyr::rowwise() %>% + dplyr::mutate(`:=`(!!rlang::sym(norm_scale_varname), + inverse_log_scaling_factor/m)) %>% + dplyr::select(-temp_scaling_factor, -inverse_log_scaling_factor, -m) + + mzroll_list <- romic::update_tomic(mzroll_list, updated_samples) + return(mzroll_list) + } #' Normalize Peaks - Loading Value From 3c805e532947355d783010b2ff5b05f92f6228a5 Mon Sep 17 00:00:00 2001 From: Johanna Fleischman <71276903+johanna0321@users.noreply.github.com> Date: Fri, 17 May 2024 12:35:20 -0700 Subject: [PATCH 7/9] Update documentation and functions --- NAMESPACE | 1 + R/mutate_mzroll_list.R | 25 +++++++++-------- man/median_polish_predict_dilutions.Rd | 37 ++++++++++++++++++++++++++ man/normalize_peaks.Rd | 4 ++- 4 files changed, 55 insertions(+), 12 deletions(-) create mode 100644 man/median_polish_predict_dilutions.Rd diff --git a/NAMESPACE b/NAMESPACE index 7babc77..2b64fb5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,7 @@ export(floor_peaks) export(impute_missing_peaks) export(is_has_label) export(lipid_components) +export(median_polish_predict_dilutions) export(merge_compounds_tbl) export(merge_samples_tbl) export(normalize_peaks) diff --git a/R/mutate_mzroll_list.R b/R/mutate_mzroll_list.R index c2236ae..ef33fdf 100644 --- a/R/mutate_mzroll_list.R +++ b/R/mutate_mzroll_list.R @@ -453,7 +453,7 @@ normalize_peaks_median_polish <- function(mzroll_list, #' Using `median_polish_scaling_factor` output from `normalize_peaks_median_polish`, #' predict sample-wise dilutions #' -#' @details This function performs an inverse-normalization on the scaling factors, +#' @details This function performs an inverse-log transformation on the scaling factors, #' and scales the samples based on the observed maximum scaling factor. Assumes that #' median polish is perform on log2 transformed data #' @@ -461,16 +461,18 @@ normalize_peaks_median_polish <- function(mzroll_list, #' @param scaling_factor scaling factor, defaults to `median_polish_scaling_factor` #' and must be a `samples` column in `mzroll_list` #' @param norm_scale_varname variable in samples to add for dilution predictions -#' @param group_var optional grouping variable on which to calculate maximum dilution +#' @param group_var optional grouping variable on which to calculate maximum dilution, +#' must be a `samples` column in `mzroll_list` #' #' @return a \code{mzroll_list} with \code{norm_scale_varname} variable added to #' samples #' #' @export -median_polish_predict_dilutions <- function (mzroll_list, - scaling_factor = "median_polish_scaling_factor", - norm_scale_varname = "median_polish_predicted_dilutions", - group_vars = NULL) { +median_polish_predict_dilutions <- function(mzroll_list, + scaling_factor = "median_polish_scaling_factor", + norm_scale_varname = "median_polish_predicted_dilutions", + group_vars = NULL) { + test_mzroll_list(mzroll_list) checkmate::assertString(scaling_factor) @@ -487,21 +489,22 @@ median_polish_predict_dilutions <- function (mzroll_list, dplyr::mutate(temp_scaling_factor = !!rlang::sym(scaling_factor)) %>% dplyr::mutate(inverse_log_scaling_factor = 2^temp_scaling_factor) - if(!is.null(group_var) && any(group_vars %in% colnames(mzroll_list$samples))) { + if(!is.null(group_vars) && any(group_vars %in% colnames(mzroll_list$samples))) { updated_samples <- updated_samples %>% - dplyr::group_by(group_vars) %>% - dplyr::mutate(m = max(inverse_log_scaling_factor)) %>% - dplyr::ungroup() + dplyr::group_by_at(group_vars) %>% + dplyr::mutate(m = max(inverse_log_scaling_factor)) } else { + max_temp <- max(updated_samples$inverse_log_scaling_factor, na.rm = T) updated_samples <- updated_samples %>% - dplyr::mutate(m = max(inverse_log_scaling_factor)) + dplyr::mutate(m = .env$max_temp) } updated_samples <- updated_samples %>% + dplyr::ungroup() %>% dplyr::rowwise() %>% dplyr::mutate(`:=`(!!rlang::sym(norm_scale_varname), inverse_log_scaling_factor/m)) %>% diff --git a/man/median_polish_predict_dilutions.Rd b/man/median_polish_predict_dilutions.Rd new file mode 100644 index 0000000..d85edbe --- /dev/null +++ b/man/median_polish_predict_dilutions.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mutate_mzroll_list.R +\name{median_polish_predict_dilutions} +\alias{median_polish_predict_dilutions} +\title{Predict Dilutions from Median Polish Scaling Factor} +\usage{ +median_polish_predict_dilutions( + mzroll_list, + scaling_factor = "median_polish_scaling_factor", + norm_scale_varname = "median_polish_predicted_dilutions", + group_vars = NULL +) +} +\arguments{ +\item{mzroll_list}{data in triple omic structure} + +\item{scaling_factor}{scaling factor, defaults to `median_polish_scaling_factor` +and must be a `samples` column in `mzroll_list`} + +\item{norm_scale_varname}{variable in samples to add for dilution predictions} + +\item{group_var}{optional grouping variable on which to calculate maximum dilution, +must be a `samples` column in `mzroll_list`} +} +\value{ +a \code{mzroll_list} with \code{norm_scale_varname} variable added to +samples +} +\description{ +Using `median_polish_scaling_factor` output from `normalize_peaks_median_polish`, +predict sample-wise dilutions +} +\details{ +This function performs an inverse-log transformation on the scaling factors, +and scales the samples based on the observed maximum scaling factor. Assumes that +median polish is perform on log2 transformed data +} diff --git a/man/normalize_peaks.Rd b/man/normalize_peaks.Rd index 1c569bf..706efbf 100644 --- a/man/normalize_peaks.Rd +++ b/man/normalize_peaks.Rd @@ -87,7 +87,9 @@ normalize_peaks_center(mzroll_list, quant_peak_varname, norm_peak_varname) \item{normalization_method}{Normalization method to apply \itemize{ - \item{\code{median polish}: column normalization based on average signal} + \item{\code{median polish}: column normalization based on average signal + (adds \code{median_polish_scaling_factor} as feature variable in addition to + \code{norm_peak_varname})} \item{\code{loading value}: column normalization using a sample-level value } From e894a85c07441e0a57913392a78c7a23cc7ec8d2 Mon Sep 17 00:00:00 2001 From: Johanna Fleischman <71276903+johanna0321@users.noreply.github.com> Date: Fri, 17 May 2024 12:44:26 -0700 Subject: [PATCH 8/9] Fix group_var name and add ungroup --- R/mutate_mzroll_list.R | 9 +++++---- man/median_polish_predict_dilutions.Rd | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/R/mutate_mzroll_list.R b/R/mutate_mzroll_list.R index ef33fdf..9d66aa7 100644 --- a/R/mutate_mzroll_list.R +++ b/R/mutate_mzroll_list.R @@ -471,7 +471,7 @@ normalize_peaks_median_polish <- function(mzroll_list, median_polish_predict_dilutions <- function(mzroll_list, scaling_factor = "median_polish_scaling_factor", norm_scale_varname = "median_polish_predicted_dilutions", - group_vars = NULL) { + group_var = NULL) { test_mzroll_list(mzroll_list) @@ -489,10 +489,10 @@ median_polish_predict_dilutions <- function(mzroll_list, dplyr::mutate(temp_scaling_factor = !!rlang::sym(scaling_factor)) %>% dplyr::mutate(inverse_log_scaling_factor = 2^temp_scaling_factor) - if(!is.null(group_vars) && any(group_vars %in% colnames(mzroll_list$samples))) { + if(!is.null(group_var) && any(group_var %in% colnames(mzroll_list$samples))) { updated_samples <- updated_samples %>% - dplyr::group_by_at(group_vars) %>% + dplyr::group_by_at(group_var) %>% dplyr::mutate(m = max(inverse_log_scaling_factor)) } else { @@ -1096,7 +1096,8 @@ normalize_peaks_center <- function (mzroll_list, updated_measurements <- mzroll_list$measurements %>% dplyr::group_by(groupId) %>% dplyr::mutate(!!rlang::sym(norm_peak_varname) := - scale(!!rlang::sym(quant_peak_varname), scale = F, center = T)) + scale(!!rlang::sym(quant_peak_varname), scale = F, center = T)) %>% + dplyr::ungroup() mzroll_list <- romic::update_tomic(mzroll_list, updated_measurements) diff --git a/man/median_polish_predict_dilutions.Rd b/man/median_polish_predict_dilutions.Rd index d85edbe..7818ef5 100644 --- a/man/median_polish_predict_dilutions.Rd +++ b/man/median_polish_predict_dilutions.Rd @@ -8,7 +8,7 @@ median_polish_predict_dilutions( mzroll_list, scaling_factor = "median_polish_scaling_factor", norm_scale_varname = "median_polish_predicted_dilutions", - group_vars = NULL + group_var = NULL ) } \arguments{ From 56b69b46a54711ab649d48f7342be39387da8787 Mon Sep 17 00:00:00 2001 From: Johanna Fleischman <71276903+johanna0321@users.noreply.github.com> Date: Mon, 20 May 2024 09:24:01 -0700 Subject: [PATCH 9/9] Add description tag --- R/mutate_mzroll_list.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/mutate_mzroll_list.R b/R/mutate_mzroll_list.R index 9d66aa7..4b43e33 100644 --- a/R/mutate_mzroll_list.R +++ b/R/mutate_mzroll_list.R @@ -450,6 +450,7 @@ normalize_peaks_median_polish <- function(mzroll_list, #' Predict Dilutions from Median Polish Scaling Factor #' +#' @description #' Using `median_polish_scaling_factor` output from `normalize_peaks_median_polish`, #' predict sample-wise dilutions #'