From 5661eca4e63d9b9ab27724d07543e102ccc61817 Mon Sep 17 00:00:00 2001 From: Johanna Fleischman <71276903+johanna0321@users.noreply.github.com> Date: Tue, 14 May 2024 10:38:12 -0700 Subject: [PATCH] Add zero centering and lm drift normalization functions --- DESCRIPTION | 2 +- R/mutate_mzroll_list.R | 98 +++++++++++++++++++++++++++++++++- man/calicomics.Rd | 17 ++++++ man/normalize_peaks.Rd | 18 +++++++ man/nplug_compounds.Rd | 2 +- man/nplug_mzroll_augmented.Rd | 2 +- man/nplug_mzroll_normalized.Rd | 2 +- man/nplug_samples.Rd | 4 +- 8 files changed, 138 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1ae5581..1aef0e8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -66,5 +66,5 @@ Suggests: dbplyr, testthat Encoding: UTF-8 LazyData: true -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 VignetteBuilder: knitr diff --git a/R/mutate_mzroll_list.R b/R/mutate_mzroll_list.R index a066244..6fb90f2 100644 --- a/R/mutate_mzroll_list.R +++ b/R/mutate_mzroll_list.R @@ -183,12 +183,16 @@ fill_in_missing_peaks <- function(mzroll_list, #' value #' } #' \item{\code{center batches}: batch centering} +#' \item{\code{center}: zero-center each compound} #' \item{\code{reference sample}: compare to reference sample} #' \item{\code{reference condition}: compare each sample to its specified #' reference sample} #' \item{\code{loess}: weighted smoothing of IC over time (adds #' \code{.loess_fit} as a peaks variable in addition to #' \code{norm_peak_varname})} +#' \item{\code{lm}: linear regression smoothing of IC over time (adds +#' \code{lm_estimate} as feature variable in addition to +#' \code{norm_peak_varname})} #' } #' @param quant_peak_varname variable in measurements to use for abundance #' @param norm_peak_varname variable in measurements to add for normalized @@ -269,7 +273,9 @@ normalize_peaks <- function(mzroll_list, "center batches", "normalize_peaks_batch_center", "reference sample", "normalize_peaks_reference_sample", "reference condition", "normalize_peaks_reference_condition", - "loess", "normalize_peaks_loess" + "loess", "normalize_peaks_loess", + "lm", "normalize_peaks_lm", + "center","normalize_peaks_center" ) checkmate::assertChoice( @@ -901,4 +907,94 @@ normalization_refloor <- function(normalized_peaks, } return(normalized_peaks) +} + +#' @inheritParams normalize_peaks_batch_center +#' @param time_col_varname variable in samples table to use for linear +#' correction +#' +#' @rdname normalize_peaks +normalize_peaks_lm <- function (mzroll_list, + quant_peak_varname, + norm_peak_varname, + time_col_varname) +{ + stopifnot(time_col_varname %in% colnames(mzroll_list$samples)) + + time_col_varname_add <- mzroll_list$samples %>% + dplyr::select(c("sampleId", time_col_varname)) + + lm_fit <- mzroll_list$measurements %>% + dplyr::left_join(., time_col_varname_add, by = "sampleId") %>% + tidyr::nest(groupData = -groupId) %>% + dplyr::mutate(lm_fits = purrr::map(groupData, + fit_lm, + quant_peak_varname = quant_peak_varname, + norm_peak_varname = norm_peak_varname, + time_col_varname = time_col_varname)) %>% + dplyr::select(-groupData) %>% + tidyr::unnest(lm_fits) + + lm_fit_measurements <- lm_fit %>% + dplyr::select(!!!rlang::syms(c(colnames(mzroll_list$measurements), norm_peak_varname))) + + lm_fit_features <- lm_fit %>% + dplyr::select(groupId, lm_estimate) %>% + unique() %>% + dplyr::left_join(mzroll_list$features, ., by = "groupId") + + mzroll_list <- romic::update_tomic(mzroll_list, lm_fit_measurements) + mzroll_list <- romic::update_tomic(mzroll_list, lm_fit_features) + + return(mzroll_list) + +} + +fit_lm <- function (groupData, + time_col_varname, + quant_peak_varname, + norm_peak_varname, + order = 1) +{ + + lm_data <- groupData %>% + dplyr::mutate(val_var = !!rlang::sym(quant_peak_varname), + dri_var = !!rlang::sym(time_col_varname)) + + lm_predict <- lm_data %>% + tidyr::drop_na(val_var) + + # Run model + lm_model <- stats::lm(val_var ~ poly(dri_var, degree = order), data = lm_predict) + + # Compute corrected values + lm_apply <- lm_data %>% + dplyr::mutate(median_value = median(.data$val_var, na.rm = T)) %>% + dplyr::mutate(`:=`(!!rlang::sym(norm_peak_varname), + .data$val_var - .env$predict(lm_model, newdata = lm_data) + .data$median_value)) %>% + dplyr::mutate(lm_estimate = summary(.env$lm_model)$coefficient[2,1]) %>% + dplyr::select(-c(val_var, dri_var, median_value)) + + return(lm_apply) + +} + + +#' @inheritParams normalize_peaks_center +#' +#' @rdname normalize_peaks +normalize_peaks_center <- function(mzroll_list, + quant_peak_varname, + norm_peak_varname) +{ + + updated_measurements <- mzroll_list$measurements %>% + dplyr::group_by(groupId) %>% + dplyr::mutate(!!rlang::sym(norm_peak_varname) := + scale(!!rlang::sym(quant_peak_varname), scale = F, center = T)) + + mzroll_list <- romic::update_tomic(mzroll_list, updated_measurements) + + return(mzroll_list) + } \ No newline at end of file diff --git a/man/calicomics.Rd b/man/calicomics.Rd index 229a27a..8b98604 100644 --- a/man/calicomics.Rd +++ b/man/calicomics.Rd @@ -2,6 +2,8 @@ % Please edit documentation in R/claman.R \docType{package} \name{calicomics} +\alias{claman} +\alias{claman-package} \alias{calicomics} \title{\code{claman} package} \description{ @@ -11,3 +13,18 @@ can then be modified through normalization, signal flooring, and filtering. Differential abundance analysis of metabolites can then be performed along with a variety of visualizations. } +\author{ +\strong{Maintainer}: Sean Hackett \email{sean@calicolabs.com} (\href{https://orcid.org/0000-0002-9553-4341}{ORCID}) + +Authors: +\itemize{ + \item Phil Seitzer \email{phillipseitzer@calicolabs.com} + \item Alireza Delfarah \email{delfarah@calicolabs.com} +} + +Other contributors: +\itemize{ + \item Calico Life Sciences LLC [copyright holder, funder] +} + +} diff --git a/man/normalize_peaks.Rd b/man/normalize_peaks.Rd index d5fb822..6d13c1b 100644 --- a/man/normalize_peaks.Rd +++ b/man/normalize_peaks.Rd @@ -8,6 +8,8 @@ \alias{normalize_peaks_reference_sample} \alias{normalize_peaks_reference_condition} \alias{normalize_peaks_loess} +\alias{normalize_peaks_lm} +\alias{normalize_peaks_center} \title{Normalize Peaks} \usage{ normalize_peaks( @@ -68,6 +70,15 @@ normalize_peaks_loess( weights_tribble = NULL, log2_floor_value = 12 ) + +normalize_peaks_lm( + mzroll_list, + quant_peak_varname, + norm_peak_varname, + time_col_varname +) + +normalize_peaks_center(mzroll_list, quant_peak_varname, norm_peak_varname) } \arguments{ \item{mzroll_list}{output of \link{process_mzroll} or @@ -87,12 +98,16 @@ normalize_peaks_loess( value } \item{\code{center batches}: batch centering} + \item{\code{center}: zero-center each compound} \item{\code{reference sample}: compare to reference sample} \item{\code{reference condition}: compare each sample to its specified reference sample} \item{\code{loess}: weighted smoothing of IC over time (adds \code{.loess_fit} as a peaks variable in addition to \code{norm_peak_varname})} + \item{\code{lm}: linear regression smoothing of IC over time (adds + \code{lm_estimate} as feature variable in addition to + \code{norm_peak_varname})} }} \item{quant_peak_varname}{variable in measurements to use for abundance} @@ -121,6 +136,9 @@ reference samples} \item{weights_tribble}{a table containing weights and sample variables to match them to.} + +\item{time_col_varname}{variable in samples table to use for linear +correction} } \value{ a \code{mzroll_list} as generated by \code{\link{process_mzroll}} diff --git a/man/nplug_compounds.Rd b/man/nplug_compounds.Rd index b1e7434..aad9a39 100644 --- a/man/nplug_compounds.Rd +++ b/man/nplug_compounds.Rd @@ -21,9 +21,9 @@ A table of meta-data for all NPLUG features with variables: } \seealso{ Other nplug: +\code{\link{nplug_mzroll}()}, \code{\link{nplug_mzroll_augmented}}, \code{\link{nplug_mzroll_normalized}}, -\code{\link{nplug_mzroll}()}, \code{\link{nplug_samples}} } \concept{nplug} diff --git a/man/nplug_mzroll_augmented.Rd b/man/nplug_mzroll_augmented.Rd index ab63202..e8b9f64 100644 --- a/man/nplug_mzroll_augmented.Rd +++ b/man/nplug_mzroll_augmented.Rd @@ -17,8 +17,8 @@ nplug_mzroll_augmented \seealso{ Other nplug: \code{\link{nplug_compounds}}, -\code{\link{nplug_mzroll_normalized}}, \code{\link{nplug_mzroll}()}, +\code{\link{nplug_mzroll_normalized}}, \code{\link{nplug_samples}} } \concept{nplug} diff --git a/man/nplug_mzroll_normalized.Rd b/man/nplug_mzroll_normalized.Rd index a2e32f5..574bea8 100644 --- a/man/nplug_mzroll_normalized.Rd +++ b/man/nplug_mzroll_normalized.Rd @@ -20,8 +20,8 @@ nplug_mzroll_normalized \seealso{ Other nplug: \code{\link{nplug_compounds}}, -\code{\link{nplug_mzroll_augmented}}, \code{\link{nplug_mzroll}()}, +\code{\link{nplug_mzroll_augmented}}, \code{\link{nplug_samples}} } \concept{nplug} diff --git a/man/nplug_samples.Rd b/man/nplug_samples.Rd index 620afc6..494ec45 100644 --- a/man/nplug_samples.Rd +++ b/man/nplug_samples.Rd @@ -39,9 +39,9 @@ A table of meta-data for all NPLUG samples with variables: \seealso{ Other nplug: \code{\link{nplug_compounds}}, +\code{\link{nplug_mzroll}()}, \code{\link{nplug_mzroll_augmented}}, -\code{\link{nplug_mzroll_normalized}}, -\code{\link{nplug_mzroll}()} +\code{\link{nplug_mzroll_normalized}} } \concept{nplug} \keyword{datasets}