From 68ec16c1644aac8bb771268679f5e5a23bc6231d Mon Sep 17 00:00:00 2001 From: Yichen Wang Date: Fri, 26 Apr 2024 12:39:37 -0400 Subject: [PATCH] Added new visualization functions --- DESCRIPTION | 4 +- NAMESPACE | 2 + NEWS.md | 5 ++ R/visualization.R | 145 +++++++++++++++++++++++++++++++++++++++ man/plotBarcodeRank.Rd | 44 ++++++++++++ man/plotProportionBox.Rd | 98 ++++++++++++++++++++++++++ 6 files changed, 296 insertions(+), 2 deletions(-) create mode 100644 man/plotBarcodeRank.Rd create mode 100644 man/plotProportionBox.Rd diff --git a/DESCRIPTION b/DESCRIPTION index aafe286..7e613df 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: rliger -Version: 2.0.1 -Date: 2024-04-04 +Version: 2.0.2 +Date: 2024-04-26 Type: Package Title: Linked Inference of Genomic Experimental Relationships Description: Uses an extension of nonnegative matrix factorization to identify shared and dataset-specific factors. See Welch J, Kozareva V, et al (2019) , and Liu J, Gao C, Sodicoff J, et al (2020) for more details. diff --git a/NAMESPACE b/NAMESPACE index 870d8c1..e2f5156 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -134,6 +134,7 @@ export(optimizeNewData) export(optimizeNewK) export(optimizeNewLambda) export(optimizeSubset) +export(plotBarcodeRank) export(plotByDatasetAndCluster) export(plotCellViolin) export(plotClusterDimRed) @@ -157,6 +158,7 @@ export(plotMarkerHeatmap) export(plotPeakDimRed) export(plotProportion) export(plotProportionBar) +export(plotProportionBox) export(plotProportionDot) export(plotProportionPie) export(plotSankey) diff --git a/NEWS.md b/NEWS.md index ea880d2..00fa4a8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,6 +13,11 @@ - Pseudo-bulk should be easy because we are just aggregating cells. - Wilcoxon might be a bit harder because ranks are calculated per gene but the H5 sparse data is column majored. Might need to find a fast on-disk transposition method. +## rliger 2.0.2 + +- Added `plotProportionBox()` for visualizing compositional analysis +- Added `plotBarcodeRank()` for basic QC visualization + ## rliger 2.0.1 - Fixed wrong UINMF aborting criteria diff --git a/R/visualization.R b/R/visualization.R index 49233aa..a6a48ea 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -332,6 +332,38 @@ plotGeneDetectedViolin <- function( ylab = "Number of Genes Detected", ...) } +#' Create barcode-rank plot for each dataset +#' @description +#' This function ranks the total count of each cell within each dataset and make +#' line plot. This function is simply for examining the input raw count data +#' and does not infer any recommended cutoff for removing non-cell barcodes. +#' @param object A \linkS4class{liger} object. +#' @inheritDotParams .ggScatter dotSize dotAlpha raster +#' @inheritDotParams .ggplotLigerTheme title subtitle xlab ylab baseSize titleSize subtitleSize xTextSize xTitleSize yTextSize yTitleSize panelBorder plotly +#' @export +#' @return A list object of ggplot for each dataset +#' @examples +#' plotBarcodeRank(pbmc) +plotBarcodeRank <- function( + object, + ... +) { + pl <- list() + for (d in names(object)) { + df <- data.frame( + rank = seq_len(lengths(object)[d]), + nUMI = sort( + cellMeta(object, columns = "nUMI", useDatasets = d), + decreasing = TRUE + ) + ) + pl[[d]] <- .ggScatter(df, x = "rank", y = "nUMI", ...) + + ggplot2::scale_y_log10() + + ggplot2::scale_x_log10() + + ggplot2::geom_line() + } + pl +} #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Proportion ##### #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -566,6 +598,119 @@ plotProportionPie <- function( ) } +#' Box plot of cluster proportion in each dataset, grouped by condition +#' @description +#' This function calculate the proportion of each category (e.g. cluster, cell +#' type) within each dataset, and then make box plot grouped by condition. The +#' proportion of all categories within one dataset sums up to 1. The condition +#' variable must be a variable of dataset, i.e. each dataset must belong to only +#' one condition. +#' +#' @param object A \linkS4class{liger} object. +#' @param useCluster Name of variable in \code{cellMeta(object)}. Default +#' \code{NULL} uses default cluster. +#' @param conditionBy Name of the variable in \code{cellMeta(object)} that +#' represents the condition. Must be a high level variable of the datasets, i.e. +#' each dataset must belong to only one condition. Default \code{NULL} does not +#' group by condition. +#' @param dot Logical, whether to add dot plot on top of the box plot. Default +#' \code{FALSE}. +#' @param dotSize Size of the dot. Default uses user option "ligerDotSize", or +#' \code{1} if not set. +#' @param dotJitter Logical, whether to jitter the dot to avoid overlapping +#' within a box when many dots are presented. Default \code{FALSE}. +#' @inheritDotParams .ggplotLigerTheme title subtitle xlab ylab legendColorTitle legendFillTitle legendShapeTitle legendSizeTitle showLegend legendPosition baseSize titleSize subtitleSize xTextSize xTitleSize yTextSize yTitleSize legendTextSize legendTitleSize panelBorder legendNRow legendNCol colorLabels colorValues colorPalette colorDirection naColor colorLow colorMid colorHigh colorMidPoint plotly +#' @export +#' @return A ggplot object +#' @examples +#' plotProportionBox(pbmcPlot) +plotProportionBox <- function( + object, + useCluster = NULL, + conditionBy = NULL, + dot = FALSE, + dotSize = getOption("ligerDotSize", 1), + dotJitter = FALSE, + ... +) { + useCluster <- useCluster %||% object@uns$defaultCluster + if (is.null(useCluster)) { + cli::cli_abort("No cluster specified nor default set.") + } + clusterVar <- .fetchCellMetaVar(object, useCluster, checkCategorical = TRUE) + datasetVar <- object$dataset + compositionTable <- table(datasetVar, clusterVar) + dfLong <- data.frame(compositionTable) + names(dfLong) <- c("dataset", useCluster, "Count") + + if (!is.null(conditionBy)) { + conditionVar <- .fetchCellMetaVar( + object = object, variables = conditionBy, checkCategorical = TRUE + ) + # Check that condition variable is strictly a high level variable of dataset + if (!all(rowSums(table(datasetVar, conditionVar) > 0) == 1)) { + cli::cli_abort("Condition variable must be a high level variable of the datasets, i.e. each dataset must belong to only one condition.") + } + + conditionTable <- table(datasetVar, conditionVar) + conditionMap <- apply( + conditionTable, + MARGIN = 1, + function(row) colnames(conditionTable)[row > 0] + ) + conditionVar <- .fetchCellMetaVar( + object = object, variables = conditionBy, checkCategorical = TRUE + ) + # Check that condition variable is strictly a high level variable of dataset + if (!all(rowSums(table(datasetVar, conditionVar) > 0) == 1)) { + cli::cli_abort("Condition variable must be a high level variable of the datasets, i.e. each dataset must belong to only one condition.") + } + + conditionTable <- table(datasetVar, conditionVar) + conditionMap <- apply( + conditionTable, + MARGIN = 1, + function(row) colnames(conditionTable)[row > 0] + ) + dfLong[[conditionBy]] <- factor( + conditionMap[dfLong$dataset], + levels = levels(conditionVar) + ) + } + + p <- dfLong %>% + dplyr::group_by(dataset) %>% + dplyr::mutate( + Proportion = .data[["Count"]] / sum(.data[["Count"]])#, + ) %>% + ggplot2::ggplot( + mapping = ( + if (!is.null(conditionBy)) + ggplot2::aes( + x = .data[[useCluster]], + y = .data[["Proportion"]], + fill = .data[[conditionBy]] + ) + else ggplot2::aes( + x = .data[[useCluster]], + y = .data[["Proportion"]] + ) + ) + ) + + (if (isTRUE(dot)) + ggplot2::geom_point( + size = dotSize, + color = "black", + position = + if (isTRUE(dotJitter)) ggplot2::position_jitter() + else "identity" + ) + else + NULL) + + ggplot2::geom_boxplot() + .ggplotLigerTheme(p, ...) +} + #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Volcano plot #### #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/man/plotBarcodeRank.Rd b/man/plotBarcodeRank.Rd new file mode 100644 index 0000000..7cb98e6 --- /dev/null +++ b/man/plotBarcodeRank.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualization.R +\name{plotBarcodeRank} +\alias{plotBarcodeRank} +\title{Create barcode-rank plot for each dataset} +\usage{ +plotBarcodeRank(object, ...) +} +\arguments{ +\item{object}{A \linkS4class{liger} object.} + +\item{...}{ + Arguments passed on to \code{\link[=.ggScatter]{.ggScatter}}, \code{\link[=.ggplotLigerTheme]{.ggplotLigerTheme}} + \describe{ + \item{\code{dotSize,dotAlpha}}{Numeric, controls the size or transparency of all +dots. Default \code{getOption("ligerDotSize")} (1) and \code{0.9}.} + \item{\code{raster}}{Logical, whether to rasterize the plot. Default \code{NULL} +automatically rasterize the plot when number of total cells to be plotted +exceeds 100,000.} + \item{\code{title,subtitle,xlab,ylab}}{Main title, subtitle or X/Y axis title text. +By default, no main title or subtitle will be set, and X/Y axis title will be +the names of variables used for plotting. Use \code{NULL} to hide elements. +\code{TRUE} for \code{xlab} or \code{ylab} shows default values.} + \item{\code{baseSize}}{One-parameter control of all text sizes. Individual text +element sizes can be controlled by other size arguments. "Title" sizes are +2 points larger than "text" sizes when being controlled by this.} + \item{\code{panelBorder}}{Whether to show rectangle border of the panel instead of +using ggplot classic bottom and left axis lines. Default \code{FALSE}.} + \item{\code{plotly}}{Whether to use plotly to enable web based interactive browsing +for the plot. Requires installation of package "plotly". Default +\code{FALSE}.} + }} +} +\value{ +A list object of ggplot for each dataset +} +\description{ +This function ranks the total count of each cell within each dataset and make +line plot. This function is simply for examining the input raw count data +and does not infer any recommended cutoff for removing non-cell barcodes. +} +\examples{ +plotBarcodeRank(pbmc) +} diff --git a/man/plotProportionBox.Rd b/man/plotProportionBox.Rd new file mode 100644 index 0000000..9937548 --- /dev/null +++ b/man/plotProportionBox.Rd @@ -0,0 +1,98 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualization.R +\name{plotProportionBox} +\alias{plotProportionBox} +\title{Box plot of cluster proportion in each dataset, grouped by condition} +\usage{ +plotProportionBox( + object, + useCluster = NULL, + conditionBy = NULL, + dot = FALSE, + dotSize = getOption("ligerDotSize", 1), + dotJitter = FALSE, + ... +) +} +\arguments{ +\item{object}{A \linkS4class{liger} object.} + +\item{useCluster}{Name of variable in \code{cellMeta(object)}. Default +\code{NULL} uses default cluster.} + +\item{conditionBy}{Name of the variable in \code{cellMeta(object)} that +represents the condition. Must be a high level variable of the datasets, i.e. +each dataset must belong to only one condition. Default \code{NULL} does not +group by condition.} + +\item{dot}{Logical, whether to add dot plot on top of the box plot. Default +\code{FALSE}.} + +\item{dotSize}{Size of the dot. Default uses user option "ligerDotSize", or +\code{1} if not set.} + +\item{dotJitter}{Logical, whether to jitter the dot to avoid overlapping +within a box when many dots are presented. Default \code{FALSE}.} + +\item{...}{ + Arguments passed on to \code{\link[=.ggplotLigerTheme]{.ggplotLigerTheme}} + \describe{ + \item{\code{title,subtitle,xlab,ylab}}{Main title, subtitle or X/Y axis title text. +By default, no main title or subtitle will be set, and X/Y axis title will be +the names of variables used for plotting. Use \code{NULL} to hide elements. +\code{TRUE} for \code{xlab} or \code{ylab} shows default values.} + \item{\code{legendColorTitle,legendFillTitle,legendShapeTitle,legendSizeTitle}}{Set +alternative title text for legend on aes of color, fill, shape and size, +respectively. Default \code{NULL} shows the original variable name.} + \item{\code{showLegend}}{Whether to show the legend. Default \code{TRUE}.} + \item{\code{legendPosition}}{Text indicating where to place the legend. Choose from +\code{"top"}, \code{"bottom"}, \code{"left"} or \code{"right"}. Default +\code{"right"}.} + \item{\code{baseSize}}{One-parameter control of all text sizes. Individual text +element sizes can be controlled by other size arguments. "Title" sizes are +2 points larger than "text" sizes when being controlled by this.} + \item{\code{titleSize,xTitleSize,yTitleSize,legendTitleSize}}{Size of main title, +axis titles and legend title. Default \code{NULL} controls by +\code{baseSize + 2}.} + \item{\code{subtitleSize,xTextSize,yTextSize,legendTextSize}}{Size of subtitle text, +axis texts and legend text. Default \code{NULL} controls by \code{baseSize}.} + \item{\code{panelBorder}}{Whether to show rectangle border of the panel instead of +using ggplot classic bottom and left axis lines. Default \code{FALSE}.} + \item{\code{colorLabels,colorValues}}{Each a vector with as many values as the +number of categories for the categorical coloring aesthetics. Labels will be +the shown text and values will be the color code. These are passed to +\code{\link[ggplot2]{scale_color_manual}}. Default uses an internal selected +palette if there are <= 26 colors needed, or ggplot hues otherwise, and plot +original labels (levels of the factor).} + \item{\code{legendNRow,legendNCol}}{Integer, when too many categories in one +variable, arranges number of rows or columns. Default \code{NULL}, +automatically split to \code{ceiling(levels(variable)/10)} columns.} + \item{\code{colorPalette}}{For continuous coloring, an index or a palette name to +select from available options from ggplot +\code{\link[ggplot2]{scale_brewer}} or \code{\link[viridisLite]{viridis}}. +Default \code{"magma"}.} + \item{\code{colorDirection}}{Choose \code{1} or \code{-1}. Applied when +\code{colorPalette} is from Viridis options. Default \code{-1} use darker +color for higher value, while \code{1} reverses this direction.} + \item{\code{colorLow,colorMid,colorHigh,colorMidPoint}}{All four of these must be +specified to customize palette with} + \item{\code{naColor}}{The color code for \code{NA} values. Default \code{"#DEDEDE"}. +\code{\link[ggplot2]{scale_colour_gradient2}}. Default \code{NULL}.} + \item{\code{plotly}}{Whether to use plotly to enable web based interactive browsing +for the plot. Requires installation of package "plotly". Default +\code{FALSE}.} + }} +} +\value{ +A ggplot object +} +\description{ +This function calculate the proportion of each category (e.g. cluster, cell +type) within each dataset, and then make box plot grouped by condition. The +proportion of all categories within one dataset sums up to 1. The condition +variable must be a variable of dataset, i.e. each dataset must belong to only +one condition. +} +\examples{ +plotProportionBox(pbmcPlot) +}