From 74fe2ad0bba55119b85169f38e2acea5a7cb4c40 Mon Sep 17 00:00:00 2001 From: maggiec Date: Thu, 8 Dec 2022 23:24:04 -0500 Subject: [PATCH] add functions for heatmap, dotplot, 3d-tsne and dual-labeling --- R/3D_tSNE.R | 68 ++++ R/Dotplot_by_Metadata.R | 65 ++++ R/Dual_Labeling.R | 331 ++++++++++++++++++ R/Heatmap.R | 394 ++++++++++++++++++++++ man/DotplotMet.Rd | 37 ++ man/DualLabeling.Rd | 94 ++++++ man/Heatmap.Rd | 70 ++++ man/tSNE3D.Rd | 31 ++ tests/testthat/test-3D_tSNE.R | 14 + tests/testthat/test-Dotplot_by_Metadata.R | 25 ++ tests/testthat/test-Dual_Labeling.R | 30 ++ tests/testthat/test-Heatmap.R | 22 ++ 12 files changed, 1181 insertions(+) create mode 100644 R/3D_tSNE.R create mode 100644 R/Dotplot_by_Metadata.R create mode 100755 R/Dual_Labeling.R create mode 100644 R/Heatmap.R create mode 100644 man/DotplotMet.Rd create mode 100755 man/DualLabeling.Rd create mode 100644 man/Heatmap.Rd create mode 100644 man/tSNE3D.Rd create mode 100644 tests/testthat/test-3D_tSNE.R create mode 100644 tests/testthat/test-Dotplot_by_Metadata.R create mode 100755 tests/testthat/test-Dual_Labeling.R create mode 100644 tests/testthat/test-Heatmap.R diff --git a/R/3D_tSNE.R b/R/3D_tSNE.R new file mode 100644 index 0000000..95c1900 --- /dev/null +++ b/R/3D_tSNE.R @@ -0,0 +1,68 @@ +# This code comes from NIDAP 'Plot 3D TSNE from Seurat Object [scRNA-Seq][CCBR]' code template + +#' @title Plot 3D TSNE from Seurat Object and saves as html file +#' @description Returns 3D tSNE plot for seurat object +#' @details This method provides visualization of 3D tSNE plot +#' +#' @param object Seurat-class object +#' @param color.variable Metadata column to use for color +#' @param label.variable Metadata column to use for label +#' @param fileName Filename for saving plot (default is "plot.html") +#' @param npcs Number of PC's (default is 15) +#' @param save.plot Save plot (default is FALSE) +#' +#' @import Seurat +#' @import ggplot2 +#' @importFrom plotly as_widget plot_ly +#' +#' @export + +tSNE3D <- function(object, + color.variable, + label.variable, + fileName = "plot.html", + save.plot = FALSE, + npcs = 15){ + + color.variable <- sub("orig_ident","orig.ident",color.variable) + label.variable <- sub("orig_ident","orig.ident",label.variable) + cols=c("blue","green","red","orange","purple4","darkcyan","magenta2", + "darkred","darkorange") + + #Run TSNE again to get 3d coordinates: + object <- RunTSNE(object, assay="SCT", + dims=1:npcs, + dim.embed = 3, + seed.use=1) + + tsne.coord <- as.data.frame(object@reductions$tsne@cell.embeddings) + + tsne.df <- data.frame(TSNE1=tsne.coord$tSNE_1, + TSNE2=tsne.coord$tSNE_2, + TSNE3=tsne.coord$tSNE_3, + colorvar=as.factor(object@meta.data[[color.variable]]), + label = object@meta.data[[label.variable]]) + + fig <- plot_ly(tsne.df, x = ~TSNE1, + y = ~TSNE2, + z = ~TSNE3, + color = ~colorvar, + colors = cols, + type="scatter3d", + mode="markers", + hoverinfo = 'text', + text = ~label, + size= 4) + + legend=TRUE + if(legend==FALSE){ + fig <- hide_legend(fig) + } + + if(save.plot == TRUE){ + htmlwidgets::saveWidget(as_widget(fig), fileName, selfcontained=TRUE) + } + + tsne.results <- list("plot" = fig, "data" = tsne.df) + return(tsne.results) +} diff --git a/R/Dotplot_by_Metadata.R b/R/Dotplot_by_Metadata.R new file mode 100644 index 0000000..a9b5417 --- /dev/null +++ b/R/Dotplot_by_Metadata.R @@ -0,0 +1,65 @@ +# This code comes from NIDAP 'Dotplot of Gene Expression by Metadata [scRNA-Seq][CCBR] (79573b27-8a93-4f22-9863-993be1a44fc1): v16' code template + +#' @title Dotplot of Gene Expression by Metadata +#' @description This Dotplot plotter plots average gene expression values for a set of genes from an input table. Input table contains a single column for "Genes" and a single column for "Cell Type". The values in the "Cell Type" column should match the values provided in the metadata template (Metadata Category to Plot). The plot will order the genes (x-axis, left to right) and Cell Types (y-axis, top to bottom) in the order in which it appears in the input table. Based on the additional column selected (Sample Column where sample or any other metadata category), it will display a contingency table for the Metadata +#' @details This method provides a visualization plot showing the frequency of positively +#' +#' @param input.dataset +#' @param metadata.category.to.plot +#' @param sample Column that contains sample id +#' @param cell.type Column that contains the cell type information +#' @param genes.column Column that contains gene names +#' @param dot.color Dot color (default is "dark red") + +#' @import tidyverse +#' @import Seurat +#' @import cowplot + +#' @export +#' +#' @return Dotplot with markers and cell types. + +DotplotMet <- function(object, + metadata, + sample.column, + cell.type, + markers, + dot.color = "darkred") { + + #Make the metadata match: + metadata.df <- object@meta.data + #colnames(metadata.df) <- gsub("\\.","_",colnames(metadata.df)) + Idents(object) <- metadata.df[[metadata]] + + #Bring in input genes and custom names + markers <- gsub("[[:space:]]", "", markers) + markers <- markers[!duplicated(markers)] + celltype <- celltype[celltype != ""] + dp <- DotPlot(object, assay="SCT", features=markers, + dot.scale=4, + cols = c("lightgrey", dot.color)) + dp + dp$data$id <- factor(dp$data$id, levels=rev(celltype)) + dp$data$features.plot <- factor(dp$data$features.plot, levels=markers) + + plot <- ggplot(data = dp$data, mapping = aes_string(x = "features.plot", y = "id")) + + geom_point(mapping = aes_string(size = "pct.exp", color = "avg.exp.scaled")) + + #scale_color_gradient2(midpoint=0, low="blue", mid="white", + # high="red", space ="Lab" ) + + theme_cowplot() + + theme(axis.title.x = element_blank(), axis.text.x = element_text(angle = 90)) + + labs(y=metadata) + + # Generate Contingency Table for Annotated cell types + sample_column = sub("_",".",sample_column) + cluster_num <- as.data.frame.matrix(table(object@meta.data[[sample_column]],object@meta.data[[metadata]])) + cluster_num %>% rownames_to_column("Samples") -> cluster_num + + result.list <- list("plot" = plot, "data" = cluster_num) + + return(result.list) +} + + + + diff --git a/R/Dual_Labeling.R b/R/Dual_Labeling.R new file mode 100755 index 0000000..b27c91a --- /dev/null +++ b/R/Dual_Labeling.R @@ -0,0 +1,331 @@ +# This code comes from NIDAP 'Dual Labeling [CCBR] [scRNA-seq]' code template + +#' @title Plot coexpression of 2 markers using transcript and/or protein expression values +#' @description Returns individual and combined expression of 2 genes and allows for filtering (optional) of the Seurat object using expression thresholds +#' @details This method provides visualization of coexpression of 2 genes (or proteins) and additional methods for filtering for cells with gene expression values that are above or below thresholds set for one or both markers. +#' +#' @param object Seurat-class object +#' @param samples Samples to be included in the analysis +#' @param marker1 First gene/marker for coexpression analysis +#' @param marker_1_type Slot to use for first marker (choices are "SCT","protein","HTO") +#' @param marker2 Second gene/marker for coexpression analysis +#' @param marker_2_type Slot to use for second marker (choices are "SCT","protein","HTO") +#' @param data_reduction Dimension Reduction method to use for image (options are "umap" or "tsne") +#' @param point_size Point size for image (default is 0.5) +#' @param point_shape Point shape for image (default is 16) +#' @param point_transparency Point transparency for image (default is 0.5) +#' @param add_marker_thresholds Add marker thresholds (default is FALSE) +#' @param marker_1_threshold Threshold set for first marker (default is 0.5) +#' @param marker_2_threshold Threshold set for first marker (default is 0.5) +#' @param filter_data Add new parameter to metadata using marker thresholds (default is FALSE) +#' @param M1_filter_direction Filter to samples that have greater or less than marker 1 threshold (default is "greater than") +#' @param M2_filter_direction Filter to samples that have greater or less than marker 2 threshold (default is "greater than") +#' @param apply_filter_1 If TRUE, apply the first filter (default is TRUE) +#' @param apply_filter_2 If TRUE, apply the second filter (default is TRUE) +#' @param filter_condition If TRUE, apply both filters 1 and 2 and take intersection. If FALSE, apply filters and take the union. +#' @param parameter_name Name for metadata column for new marker filters. (Default is "Marker") +#' @param trim_marker_1 Trim top and bottom percentile of marker 1 signal to pre-scale trim values (below) to remove extremely low and high values (Default is TRUE) +#' @param trim_marker_2 Trim top and bottom percentile of marker 2 signal to pre-scale trim values (below) to remove extremely low and high values (Default is TRUE) +#' @param pre_scale_trim Set trimming percentile values (Defalut is 0.99) +#' @param density_heatmap Creates a additional heatmap showing the density distribution of cells. (Default is FALSE) +#' @param display_unscaled_values Set to TRUE if you want to view the unscaled gene/protein expression values (Default is FALSE) + +#' @import Seurat +#' @import ggplot2 +#' @importFrom scales rescale +#' @importFrom gridExtra arrangeGrob +#' @importFrom grid grid.draw +#' @importFrom dplyr arrange mutate case_when +#' @importFrom stats quantile +#' +#' @export +#' +#' @return a seurat object with optional additional metadata for cells that are positive or negative for gene markers and a coexpression plot. + +DualLabeling <- function(object, + samples, + marker1, + marker_1_type, + marker2, + marker_2_type, + data_reduction = "umap", + point_size = 0.5, + point_shape = 16, + point_transparency = 0.5, + add_marker_thresholds = FALSE, + marker_1_threshold = 0.5, + marker_2_threshold = 0.5, + filter_data = FALSE, + M1_filter_direction = "greater than", + M2_filter_direction = "greater than", + apply_filter_1 = TRUE, + apply_filter_2 = TRUE, + filter_condition = TRUE, + parameter_name = "Marker", + trim_marker_1 = TRUE, + trim_marker_2 = TRUE, + pre_scale_trim = 0.99, + density_heatmap = FALSE, + display_unscaled_values = FALSE){ + + ##--------------- ## + ## Error Messages ## + ## -------------- ## + + if(!(marker1 %in% rownames(obj))){ + stop(paste0("ERROR: ",marker1," is not found in dataset")) + } + if(!(marker2 %in% rownames(obj))){ + stop(paste0("ERROR: ",marker2," is not found in dataset")) + } + if(!(marker_1_type %in% names(obj@assays))){ + stop(paste0("ERROR: ",marker_1_type," slot is not found in dataset")) + } + if(!(marker_2_type %in% names(obj@assays))){ + stop(paste0("ERROR: ",marker_2_type," slot is not found in dataset")) + } + + +## --------------- ## +## Functions ## +## --------------- ## + +#Function for drawing overlay images for umap/tsne: +gg.overlay <- function(so.sub,df,marker1,marker2){ + df %>% arrange(mark1.scale) -> df + xmin = min(df$dr1) - 0.1*min(df$dr1) + xmax = max(df$dr1) + 0.1*min(df$dr1) + + gg.z1 <- ggplot(df, aes(dr1,dr2))+ + geom_point(color=rgb(red=df$mark1.scale,green=0,blue=0),shape=point_shape,size=point_size, alpha=point_transparency)+ + theme_classic() + + xlab(paste0(data_reduction,"-1")) + + ylab(paste0(data_reduction,"-2")) + + ggtitle(marker1) + + coord_fixed() + + df %>% arrange(mark2.scale) -> df + + gg.z2 <- ggplot(df, aes(dr1,dr2))+ + geom_point(color=rgb(red=0,green=df$mark2.scale,blue=0),shape=point_shape,size=point_size, alpha=point_transparency)+ + theme_classic() + + xlab(paste0(data_reduction,"-1")) + + ylab(paste0(data_reduction,"-2")) + + ggtitle(marker2) + + coord_fixed() + + df %>% mutate(avg = mark2.scale+mark1.scale) %>% arrange(avg) -> df + + gg <- ggplot(df, aes(dr1,dr2))+ + geom_point(color=rgb(red=df$mark1.scale,green=df$mark2.scale,blue=0),shape=point_shape,size=point_size, alpha=point_transparency)+ + theme_classic() + + xlab(paste0(data_reduction,"-1")) + + ylab(paste0(data_reduction,"-2")) + + ggtitle("Combined") + + coord_fixed() + + return(list(gg.z1,gg.z2,gg)) +} + +#Function for plotting expression data in xy overlay format: +gg.overlay2 <- function(so.sub,df,marker1,marker2){ + df %>% arrange(mark1.scale) -> df + # Create unscaled axis labels + + if(display_unscaled_values == TRUE){ + label1_min <- paste("unscaled min:", round(min(mark1),digits = 2)) + label1_max <- paste("unscaled max:", round(max(mark1),digits = 2)) + label1 <- paste(as.character(marker1), label1_min, label1_max, sep = "\n") + + label2_min <- paste("unscaled min:", round(min(mark2),digits = 2)) + label2_max <- paste("unscaled max:", round(max(mark2),digits = 2)) + label2 <- paste(as.character(marker2), label2_min, label2_max, sep = "\n")} else { + + label1 <- as.character(marker1) + label2 <- as.character(marker2) + } + + gg.z1 <- ggplot(df, aes(mark1.scale,mark2.scale))+ + geom_point(color=rgb(red=df$mark1.scale,green=0,blue=0),shape = 20,size=point_size)+ + theme_classic() + + xlab(label1) + + ylab(label2) + + coord_fixed() + + df %>% arrange(mark2.scale) -> df + + gg.z2 <- ggplot(df, aes(mark1.scale,mark2.scale))+ + geom_point(color=rgb(red=0,green=df$mark2.scale,blue=0),shape = 20,size=point_size)+ + theme_classic() + + xlab(label1) + + ylab(label2) + + coord_fixed() + + df %>% mutate(avg = mark2.scale+mark1.scale) %>% arrange(avg) -> df + + gg <- ggplot(df, aes(mark1.scale,mark2.scale))+ + geom_point(color=rgb(red=df$mark1.scale,green=df$mark2.scale,blue=0),shape = 20,size=point_size)+ + theme_classic() + + xlab(label1) + + ylab(label2) + + coord_fixed() + + if(add_marker_thresholds==TRUE){ + gg.z1 <- gg.z1 + + geom_vline(xintercept=t1,linetype="dashed") + + geom_hline(yintercept=t2,linetype="dashed") + gg.z2 <- gg.z2 + + geom_vline(xintercept=t1,linetype="dashed") + + geom_hline(yintercept=t2,linetype="dashed") + gg <- gg + + geom_vline(xintercept=t1,linetype="dashed") + + geom_hline(yintercept=t2,linetype="dashed") + } + + return(list(gg.z1,gg.z2,gg)) +} + +## --------------- ## +## Main Code Block ## +## --------------- ## + +# Load and subset data using sample names + +if("active.ident" %in% slotNames(object)){ + sample_name = as.factor(object@meta.data$orig.ident) + names(sample_name)=names(object@active.ident) + object@active.ident <- as.factor(vector()) + object@active.ident <- sample_name + so.sub = subset(object, ident = samples) +} else { + sample_name = as.factor(object@meta.data$orig.ident) + names(sample_name)=names(object@active.ident) + object@active.ident <- as.factor(vector()) + object@active.ident <- sample_name + so.sub = subset(object, ident = samples) +} + +t1 = marker_1_threshold +t2 = marker_2_threshold + +#Select marker 1 values and scale: +mark1 = so.sub@assays[[marker_1_type]]@scale.data[marker1,] +if(trim_marker_1 == TRUE){ + q1 = quantile(mark1,pre_scale_trim) + q0 = quantile(mark1,1-pre_scale_trim) + mark1[mark1q1]=q1 +} +mark1.scale <- rescale(mark1, to=c(0,1)) + +#Select marker 2 values and scale: +mark2 = so.sub@assays[[marker_2_type]]@scale.data[marker2,] +if(trim_marker_2 == TRUE){ + q1 = quantile(mark2,pre_scale_trim) + q0 = quantile(mark2,1-pre_scale_trim) + mark2[mark2q1]=q1 +} +mark2.scale <- rescale(mark2, to=c(0,1)) + +#Draw Plots: +df <- data.frame(cbind(dr1=so.sub@reductions[[data_reduction]]@cell.embeddings[,1], + dr2=so.sub@reductions[[data_reduction]]@cell.embeddings[,2], + mark1.scale,mark2.scale)) +gg.list <- gg.overlay(so.sub,df,marker1,marker2) +gg.list2 <- gg.overlay2(so.sub,df,marker1,marker2) + +n=3 +imageWidth = 1000*n +imageHeight = 1000*2 +dpi = 300 + +png( + width=imageWidth, + height=imageHeight, + units="px", + pointsize=4, + bg="white", + res=dpi, + type="cairo") + +if(density_heatmap == TRUE){ + x <- df$mark1.scale + y <- df$mark2.scale + + df_heatmap <- data.frame(x = x, y = y, + d = densCols(x, y, nbin = 500, colramp = colorRampPalette(rev(rainbow(10, end = 4/6))))) + + p <- ggplot(df_heatmap) + + geom_point(aes(x, y, col = d), size = 1) + + scale_color_identity() + xlab(marker1) + ylab(marker2) + + theme_bw() + geom_vline(xintercept=t1,linetype="dashed") + + geom_hline(yintercept=t2,linetype="dashed") + + + grob <- arrangeGrob(gg.list[[1]], gg.list[[2]], gg.list[[3]], gg.list2[[1]],gg.list2[[2]],gg.list2[[3]],p,ncol=3) +} else { + grob <- arrangeGrob(gg.list[[1]], gg.list[[2]], gg.list[[3]], gg.list2[[1]],gg.list2[[2]],gg.list2[[3]],ncol=3) +} + +#Applying Filters to Data using Thresholds: +if(filter_data == TRUE){ + df <- df %>% mutate(sample=so.sub@meta.data$sample) %>% + mutate(cellbarcode=rownames(so.sub@meta.data)) + + if(M1_filter_direction == "greater than"){ + ind1 <- df$`mark1.scale` > t1 + } else { + ind1 <- df$`mark1.scale` < t1 + } + + cat("\n") + print("Marker 1 filter:") + print(sum(ind1)) + + if(M2_filter_direction == "greater than"){ + ind2 <- df$`mark2.scale` > t2 + } else { + ind2 <- df$`mark2.scale` < t2 + } + + cat("\n") + print("Marker 2 filter:") + print(sum(ind2)) + + if(apply_filter_1){ + if(apply_filter_2){ + if(filter_condition == TRUE){ + df <- df[c(ind1&ind2),] + } else { + df <- df[c(ind1|ind2),] + } + } else { + df <- df[ind1,] + } + } else { + if(apply_filter_2){ + df <- df[ind2,] + } + } + + colnames(df)[3:4]= c(marker1,marker2) + so.sub@meta.data %>% + mutate(x = case_when(rownames(so.sub@meta.data) %in% df$cellbarcode ~ TRUE, TRUE ~ FALSE)) -> so.sub.df + colnames(so.sub@meta.data) <- sub("x",parameter_name,colnames(so.sub@meta.data)) + + cat("\n") + print("Final Breakdown:") + print(addmargins(table(so.sub.df[[parameter_name]],so.sub.df$sample_name))) + rownames(so.sub.df) <- rownames(so.sub@meta.data) + so.sub@meta.data <- so.sub.df + + cat("\n") + print("After Filter applied:") + print(dim(df)[1]) +} + +result.list <- list("so" = so.sub,"plot" = grob) + +return(result.list) +} \ No newline at end of file diff --git a/R/Heatmap.R b/R/Heatmap.R new file mode 100644 index 0000000..184434f --- /dev/null +++ b/R/Heatmap.R @@ -0,0 +1,394 @@ +# This code comes from NIDAP 'Heatmap for Single Cell Data [scRNA-Seq][CCBR]' code template + +#' @title Plot coexpression of 2 markers using transcript and/or protein expression values +#' @description Returns individual and combined expression of 2 genes and allows for filtering (optional) of the Seurat object using expression thresholds +#' @details This method provides visualization of coexpression of 2 genes (or proteins) and additional methods for filtering for cells with gene expression values that are above or below thresholds set for one or both markers. +#' +#' @param object Seurat-class object +#' @param sample.names Sample names +#' @param metadata Metadata column to plot +#' @param transcripts Transcripts to plot +#' @param proteins Proteins to plot (default is NULL) +#' @param plot.title Title of plot (default is "Heatmap") +#' @param add.gene.or.protein Add Gene or protein annotations (default is FALSE) +#' @param protein.annotations Protein annotations to add (defulat is NULL) +#' @param rna.annotations Gene annotations to add (default is NULL) +#' @param arrange.by.metadata Arrange by metadata (default is TRUE) +#' @param add.row.names Add row names (default is FALSE) +#' @param add.column.names Add column names (default is FALSE) +#' @param set.seed Seed for colors (default is 6) +#' @param scale.data Perform z-scaling on rows (default is TRUE) +#' @param trim.outliers Remove outlier data (default is TRUE) +#' @param trim.outliers.percentage Set outlier percentage (default is 0.01) +#' @param order.heatmap.rows Order heatmap rows (default is FALSE) +#' @param row.order Set row order to gene vector (default is NULL) +#' +#' @import Seurat +#' @import ggplot2 +#' @import pheatmap +#' @import dendsort +#' @import dendextend +#' +#' @export +#' +Heatmap <- function(object, + sample.names, + metadata, + transcripts, + proteins = NULL, + plot.title = "Heatmap", + add.gene.or.protein = TRUE, + protein.annotations = NULL, + rna.annotations = NULL, + arrange.by.metadata = TRUE, + add.row.names = TRUE, + add.column.names = FALSE, + set.seed = 6, + scale.data = TRUE, + trim.outliers = TRUE, + trim.outliers.percentage = 0.01, + order.heatmap.rows = FALSE, + row.order = c()) { + ## -------------------------------- ## + ## Functions ## + ## -------------------------------- ## + + n <- 2e3 + + set.seed(set.seed) + ourColorSpace <- colorspace::RGB(runif(n), runif(n), runif(n)) + ourColorSpace <- as(ourColorSpace, "LAB") + + + distinctColorPalette <-function(k=1,seed) { + currentColorSpace <- ourColorSpace@coords + # Set iter.max to 20 to avoid convergence warnings. + set.seed(seed) + km <- kmeans(currentColorSpace, k, iter.max=20) + colors <- unname(hex(LAB(km$centers))) + return(colors) + } + + pal = function (n, h=c(237, 43), c=100, l=c(70, 90), power=1, fixup=TRUE, gamma=NULL, alpha=1, ...) { + if (n < 1L) + return(character(0L)) + h <- rep(h, length.out = 2L) + c <- c[1L] + l <- rep(l, length.out = 2L) + power <- rep(power, length.out = 2L) + rval <- seq(1, -1, length = n) + rval <- hex( + polarLUV( + L = l[2L] - diff(l) * abs(rval)^power[2L], + C = c * abs(rval)^power[1L], + H = ifelse(rval > 0, h[1L], h[2L]) + ), + fixup=fixup, ... + ) + if (!missing(alpha)) { + alpha <- pmax(pmin(alpha, 1), 0) + alpha <- format(as.hexmode(round(alpha * 255 + 1e-04)), + width = 2L, upper.case = TRUE) + rval <- paste(rval, alpha, sep = "") + } + return(rval) + } + + #Color selections for heatmap: + np0 = pal(100) + np1 = diverge_hcl(100, c=100, l=c(30, 80), power=1) #Blue to Red + np2 = heat_hcl(100, c=c(80, 30), l=c(30, 90), power=c(1/5, 2)) #Red to Vanilla + np3 = rev(heat_hcl(100, h=c(0, -100), c=c(40, 80), l=c(75, 40), power=1)) #Violet to Pink + np4 = rev(colorRampPalette(brewer.pal(10, "RdYlBu"))(100)) + np5 = colorRampPalette(c("steelblue","white", "red"))(100) #Steelblue to White to Red + + np = list(np0, np1, np2, np3, np4, np5) + names(np) = c("Default","Blue to Red","Red to Vanilla","Violet to Pink","Bu Yl Rd","Bu Wt Rd") + + + doheatmap <- function(dat, clus, clus2, rn, cn, col) { + require(pheatmap) + require(dendsort) + if (scale.data == TRUE) { + tmean.scale = t(scale(t(dat))) + tmean.scale = tmean.scale[is.finite(rowSums(tmean.scale)),] + } else { + tmean.scale = dat + } + if(trim.outliers == TRUE){ + quantperc <- trim.outliers.percentage + upperquant <- 1-quantperc + for(i in 1:nrow(tmean.scale)){ + data <- tmean.scale[i,] + dat.quant <- quantile(data,probs=c(quantperc,upperquant)) + data[data > dat.quant[2]] <- dat.quant[2] + data[data < dat.quant[1]] <- dat.quant[1] + tmean.scale[i,] <- data + } + } + col.pal <- np[[col]] + drows1 <- "euclidean" + dcols1 <- "euclidean" + minx = min(tmean.scale) + maxx = max(tmean.scale) + treeheight <- 25 + breaks = seq(minx, maxx, length=100) + legbreaks = seq(minx, maxx, length=5) + breaks = sapply(breaks, signif, 4) + legbreaks = sapply(legbreaks, signif, 4) + + #Run cluster method: + hc = hclust(dist(t(tmean.scale)), method="complete") + hcrow = hclust(dist(tmean.scale), method="complete") + + if (clus) { + sort_hclust <- function(...) as.hclust(rev(dendsort(as.dendrogram(...)))) + } else { + sort_hclust <- function(...) as.hclust(dendsort(as.dendrogram(...))) + } + + if (clus2) { + rowclus <- sort_hclust(hcrow) + } else { + rowclus = FALSE + } + + pathname <- stringr::str_replace_all(plot.title, "_", " ") + pathname <- stringr::str_wrap(pathname,50) + + hm.parameters <- list( + tmean.scale, + color=col.pal, + legend_breaks=legbreaks, + scale="none", + treeheight_col=treeheight, + treeheight_row=treeheight, + kmeans_k=NA, + breaks=breaks, + fontsize_row=5, + fontsize_col=10, + show_rownames=rn, + show_colnames=cn, + main=pathname, + clustering_method="complete", + cluster_rows=rowclus, + cluster_cols=clus, + cutree_rows=1, + clustering_distance_rows=drows1, + clustering_distance_cols=dcols1, + annotation_col = annotation_col, + annotation_colors = annot_col, + labels_col = labels_col + ) + + mat = t(tmean.scale) + + callback = function(hc, mat) { + dend=rev(dendsort(as.dendrogram(hc))) + dend %>% dendextend::rotate(c(1:length(dend))) -> dend + as.hclust(dend) + } + do.call("pheatmap", c(hm.parameters, list(clustering_callback=callback))) + } + + + ## --------------- ## + ## Main Code Block ## + ## --------------- ## + + # load data + samples_to_include = sample.names + samples_to_include <- samples_to_include[samples_to_include != ""] + samples_to_include <- gsub("-","_",samples_to_include) + + #Clean up transcript names and print missing genes: + transcripts = gsub(" ","",transcripts) + if(transcripts[1] != ""){ + genesmiss = setdiff(transcripts,rownames(object$SCT@scale.data)) + if(length(genesmiss)>0){ + print(paste("missing genes:", genesmiss)) + } + } + transcripts = transcripts[transcripts %in% rownames(object$SCT@scale.data)] + + #Clean up protein names and print missing proteins: + if(!is.null(object@assays$Protein)){ + proteins = gsub(" ","",proteins) + if(proteins[1] != ""){ + protmiss = setdiff(proteins,rownames(object$Protein@scale.data)) + if(length(protmiss)>0){ + print(paste("missing proteins:", protmiss)) + } + } + proteins = proteins[proteins %in% rownames(object$Protein@scale.data)] + } + + #collect transcript expression data from SCT slot + df.mat1 = NULL + if(length(transcripts)>0){ + if(length(transcripts)==1){ + df.mat1 <- vector(mode="numeric",length=length(object$SCT@scale.data[transcripts,])) + df.mat1 <- object$SCT@scale.data[transcripts,] + } else { + df.mat1 <- as.matrix(object$SCT@scale.data[transcripts,]) + } + } + + #collect protein expression values + df.mat2 = NULL + if(!is.null(object@assays$Protein)){ + if(length(proteins)>0){ + if(length(proteins)==1){ + df.mat2 <- vector(mode="numeric",length=length(object$Protein@scale.data[proteins,])) + df.mat2 <- object$Protein@scale.data[proteins,] + protname <- paste0(proteins,"_Prot") + } else { + df.mat2 <- as.matrix(object$Protein@scale.data[proteins,]) + protname <- paste0(proteins,"_Prot") + rownames(df.mat2) <- protname + } + } + } + + #Put together transcript and protein data + df.mat <- rbind(df.mat1,df.mat2) + if(!is.null(df.mat1)){ + rownames(df.mat)[rownames(df.mat)=="df.mat1"] <- transcripts + } + + if(!is.null(df.mat2)){ + rownames(df.mat)[rownames(df.mat)=="df.mat2"] <- protname + } + + df.mat <- df.mat[sort(rownames(df.mat)),] + + #Set row order to specific genes/proteins + if(order.heatmap.rows == TRUE){ + row.order = row.order[row.order %in% rownames(df.mat)] + row.order = c(row.order,setdiff(rownames(df.mat),row.order)) + df.mat <- df.mat[row.order,] + clusrows <- FALSE + } else { + clusrows <- TRUE + } + + metadata <- sub("orig_ident","orig.ident",metadata) + #Set up annotation track: + object@meta.data %>% dplyr::filter(orig.ident %in% samples_to_include) -> metadata_table + + if(!"Barcode" %in% metadata){ + metadata = c(metadata,"Barcode") + } + + metadata_table %>% select(metadata) -> annot + a = dim(annot)[2] - 1 + + #Addition of gene and/or protein expression data to annotation tracks: + if(add.gene.or.protein == TRUE){ + if(length(protein.annotations) > 0){ + annot1 <- as.matrix(object$Protein@scale.data[protein_annotations,]) + if(length(protein.annotations)==1){ + annot1 <- annot1[match(annot$Barcode,rownames(annot1))] + protname <- paste0(protein.annotations,"_Prot") + } else { + annot1 <- annot1[,match(annot$Barcode,colnames(annot1))] + annot1 <- t(annot1) + colnames(annot1) = paste0(colnames(annot1),"_Prot") + } + } + + if(length(rna.annotations)>0){ + annot2 <- as.matrix(object$SCT@scale.data[rna.annotations,]) + if(length(rna.annotations)==1){ + annot2 <- annot2[match(annot$Barcode,rownames(annot2))] + } else { + annot2 <- annot2[,match(annot$Barcode,colnames(annot2))] + annot2 <- t(annot2) + } + } + } + + if(exists("annot1")){ + annot <- cbind(annot,annot1) + colnames(annot)[colnames(annot)=="annot1"] <- protname + } + if(exists("annot2")){ + annot <- cbind(annot,annot2) + colnames(annot)[colnames(annot)=="annot2"] <- rna.annotations + } + + #Arrange columns by metadata tracks: + if(arrange.by.metadata == TRUE){ + annot %>% arrange(across(all_of(metadata))) -> annot + df.mat <- df.mat[,match(annot$Barcode,colnames(df.mat))] + df.mat <- df.mat[ , apply(df.mat, 2, function(x) !any(is.na(x)))] + cluscol = FALSE + } else { + cluscol = TRUE + } + + #Set up annotation track and colors: + annotation_col = as.data.frame(unclass(annot[,!(names(annot) %in% "Barcode")])) + annotation_col %>% mutate_if(is.logical, as.factor) -> annotation_col + rownames(annotation_col) <- annot$Barcode + if(dim(annot)[2] == 2){ + annottitle = colnames(annot)[1] + colnames(annotation_col) = annottitle + } + annot_col = list() + groups=colnames(annotation_col) + + q=0 + for(i in 1:dim(annotation_col)[2]){ + q = q+length(unique(annotation_col[,i])) + } + + colors=distinctColorPalette(q,5) + + b=1 + i=1 + nam = NULL + col <- NULL + annot_col <- NULL + for (i in 1:length(groups)){ + nam <- groups[i] + if(class(annotation_col[,i]) != "numeric"){ + grp <- as.factor(annotation_col[,i]) + c <- b+length(levels(grp))-1 + col = colors[b:c] + names(col) <- levels(grp) + assign(nam,col) + annot_col = append(annot_col,mget(nam)) + b = c+1 + i=i+1 + } + else{ + grp <- annotation_col[,i] + np5 = colorRampPalette(c("steelblue","white", "red"))(length(grp)) + col=np5 + assign(nam,col) + annot_col = append(annot_col,mget(nam)) + } + } + + print(paste0("The total number of genes in heatmap: ", nrow(df.mat))) + labels_col <- colnames(df.mat) + + #Draw heatmap + p = doheatmap(dat=df.mat, clus=cluscol, clus2=clusrows, rn=add.row.names, cn=add.column.names, col="Bu Yl Rd") + + #Return expression matrix used in heatmap + as.data.frame(df.mat) %>% rownames_to_column("gene") -> heatmap.df + heatmap.res <- list("plot" = p, "data" = heatmap.df) + return(heatmap.res) +} + +################################################# +## Global imports and functions included below ## +################################################# + +# Functions defined here will be available to call in +# the code for any table. + + + \ No newline at end of file diff --git a/man/DotplotMet.Rd b/man/DotplotMet.Rd new file mode 100644 index 0000000..01e7875 --- /dev/null +++ b/man/DotplotMet.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Dotplot_by_Metadata.R +\name{DotplotMet} +\alias{DotplotMet} +\title{Dotplot of Gene Expression by Metadata} +\usage{ +DotplotMet( + object, + metadata, + sample.column, + cell.type, + markers, + dot.color = "darkred" +) +} +\arguments{ +\item{cell.type}{Column that contains the cell type information} + +\item{dot.color}{Dot color (default is "dark red")} + +\item{input.dataset}{} + +\item{metadata.category.to.plot}{} + +\item{sample}{Column that contains sample id} + +\item{genes.column}{Column that contains gene names} +} +\value{ +Dotplot with markers and cell types. +} +\description{ +This Dotplot plotter plots average gene expression values for a set of genes from an input table. Input table contains a single column for "Genes" and a single column for "Cell Type". The values in the "Cell Type" column should match the values provided in the metadata template (Metadata Category to Plot). The plot will order the genes (x-axis, left to right) and Cell Types (y-axis, top to bottom) in the order in which it appears in the input table. Based on the additional column selected (Sample Column where sample or any other metadata category), it will display a contingency table for the Metadata +} +\details{ +This method provides a visualization plot showing the frequency of positively +} diff --git a/man/DualLabeling.Rd b/man/DualLabeling.Rd new file mode 100755 index 0000000..740c848 --- /dev/null +++ b/man/DualLabeling.Rd @@ -0,0 +1,94 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Dual_Labeling.R +\name{DualLabeling} +\alias{DualLabeling} +\title{Plot coexpression of 2 markers using transcript and/or protein expression values} +\usage{ +DualLabeling( + object, + samples, + marker1, + marker_1_type, + marker2, + marker_2_type, + data_reduction = "umap", + point_size = 0.5, + point_shape = 16, + point_transparency = 0.5, + add_marker_thresholds = FALSE, + marker_1_threshold = 0.5, + marker_2_threshold = 0.5, + filter_data = FALSE, + M1_filter_direction = "greater than", + M2_filter_direction = "greater than", + apply_filter_1 = TRUE, + apply_filter_2 = TRUE, + filter_condition = TRUE, + parameter_name = "Marker", + trim_marker_1 = TRUE, + trim_marker_2 = TRUE, + pre_scale_trim = 0.99, + density_heatmap = FALSE, + display_unscaled_values = FALSE +) +} +\arguments{ +\item{object}{Seurat-class object} + +\item{samples}{Samples to be included in the analysis} + +\item{marker1}{First gene/marker for coexpression analysis} + +\item{marker_1_type}{Slot to use for first marker (choices are "SCT","protein","HTO")} + +\item{marker2}{Second gene/marker for coexpression analysis} + +\item{marker_2_type}{Slot to use for second marker (choices are "SCT","protein","HTO")} + +\item{data_reduction}{Dimension Reduction method to use for image (options are "umap" or "tsne")} + +\item{point_size}{Point size for image (default is 0.5)} + +\item{point_shape}{Point shape for image (default is 16)} + +\item{point_transparency}{Point transparency for image (default is 0.5)} + +\item{add_marker_thresholds}{Add marker thresholds (default is FALSE)} + +\item{marker_1_threshold}{Threshold set for first marker (default is 0.5)} + +\item{marker_2_threshold}{Threshold set for first marker (default is 0.5)} + +\item{filter_data}{Add new parameter to metadata using marker thresholds (default is FALSE)} + +\item{M1_filter_direction}{Filter to samples that have greater or less than marker 1 threshold (default is "greater than")} + +\item{M2_filter_direction}{Filter to samples that have greater or less than marker 2 threshold (default is "greater than")} + +\item{apply_filter_1}{If TRUE, apply the first filter (default is TRUE)} + +\item{apply_filter_2}{If TRUE, apply the second filter (default is TRUE)} + +\item{filter_condition}{If TRUE, apply both filters 1 and 2 and take intersection. If FALSE, apply filters and take the union.} + +\item{parameter_name}{Name for metadata column for new marker filters. (Default is "Marker")} + +\item{trim_marker_1}{Trim top and bottom percentile of marker 1 signal to pre-scale trim values (below) to remove extremely low and high values (Default is TRUE)} + +\item{trim_marker_2}{Trim top and bottom percentile of marker 2 signal to pre-scale trim values (below) to remove extremely low and high values (Default is TRUE)} + +\item{pre_scale_trim}{Set trimming percentile values (Defalut is 0.99)} + +\item{density_heatmap}{Creates a additional heatmap showing the density distribution of cells. (Default is FALSE)} + +\item{display_unscaled_values}{Set to TRUE if you want to view the unscaled gene/protein expression values (Default is FALSE)} +} +\value{ +a seurat object with optional additional metadata for cells that are positive or negative for gene markers and a coexpression plot. +} +\description{ +Returns individual and combined expression of 2 genes and allows for filtering (optional) of the Seurat object using expression thresholds +} +\details{ +This method provides visualization of coexpression of 2 genes (or proteins) and additional methods for filtering for cells with gene expression values that are above or below thresholds set for one or both markers. +} diff --git a/man/Heatmap.Rd b/man/Heatmap.Rd new file mode 100644 index 0000000..a276d24 --- /dev/null +++ b/man/Heatmap.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Heatmap.R +\name{Heatmap} +\alias{Heatmap} +\title{Plot coexpression of 2 markers using transcript and/or protein expression values} +\usage{ +Heatmap( + object, + sample.names, + metadata, + transcripts, + proteins = NULL, + plot.title = "Heatmap", + add.gene.or.protein = TRUE, + protein.annotations = NULL, + rna.annotations = NULL, + arrange.by.metadata = TRUE, + add.row.names = TRUE, + add.column.names = FALSE, + set.seed = 6, + scale.data = TRUE, + trim.outliers = TRUE, + trim.outliers.percentage = 0.01, + order.heatmap.rows = FALSE, + row.order = c() +) +} +\arguments{ +\item{object}{Seurat-class object} + +\item{sample.names}{Sample names} + +\item{metadata}{Metadata column to plot} + +\item{transcripts}{Transcripts to plot} + +\item{proteins}{Proteins to plot (default is NULL)} + +\item{plot.title}{Title of plot (default is "Heatmap")} + +\item{add.gene.or.protein}{Add Gene or protein annotations (default is FALSE)} + +\item{protein.annotations}{Protein annotations to add (defulat is NULL)} + +\item{rna.annotations}{Gene annotations to add (default is NULL)} + +\item{arrange.by.metadata}{Arrange by metadata (default is TRUE)} + +\item{add.row.names}{Add row names (default is FALSE)} + +\item{add.column.names}{Add column names (default is FALSE)} + +\item{set.seed}{Seed for colors (default is 6)} + +\item{scale.data}{Perform z-scaling on rows (default is TRUE)} + +\item{trim.outliers}{Remove outlier data (default is TRUE)} + +\item{trim.outliers.percentage}{Set outlier percentage (default is 0.01)} + +\item{order.heatmap.rows}{Order heatmap rows (default is FALSE)} + +\item{row.order}{Set row order to gene vector (default is NULL)} +} +\description{ +Returns individual and combined expression of 2 genes and allows for filtering (optional) of the Seurat object using expression thresholds +} +\details{ +This method provides visualization of coexpression of 2 genes (or proteins) and additional methods for filtering for cells with gene expression values that are above or below thresholds set for one or both markers. +} diff --git a/man/tSNE3D.Rd b/man/tSNE3D.Rd new file mode 100644 index 0000000..ecf128d --- /dev/null +++ b/man/tSNE3D.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/3D_tSNE.R +\name{tSNE3D} +\alias{tSNE3D} +\title{Plot 3D TSNE from Seurat Object and saves as html file} +\usage{ +tSNE3D( + object, + color.variable, + label.variable, + fileName = "plot.html", + npcs = 15 +) +} +\arguments{ +\item{object}{Seurat-class object} + +\item{color.variable}{Metadata column to use for color} + +\item{label.variable}{Metadata column to use for label} + +\item{fileName}{Filename for saving plot} + +\item{npcs}{Number of PC's} +} +\description{ +Returns 3D tSNE plot for seurat object +} +\details{ +This method provides visualization of 3D tSNE plot +} diff --git a/tests/testthat/test-3D_tSNE.R b/tests/testthat/test-3D_tSNE.R new file mode 100644 index 0000000..1406abf --- /dev/null +++ b/tests/testthat/test-3D_tSNE.R @@ -0,0 +1,14 @@ +test_that("Produce 3D tsne plot and return tsne coordinates", { + + seurat_object <- readRDS("/rstudio-files/ccbr-data/users/maggie/SCWorkflow/tests/testthat/fixtures/SO_moduleScore.rds") + + tsne.res <- tSNE3D(object, + color.variable = "orig_ident", + label.variable = "Likely_CellType", + fileName = "tsneplot.html", + save.plot = TRUE) + + expected.elements = c("plot","data") + expect_setequal(names(tsne.res), expected.elements) + +}) \ No newline at end of file diff --git a/tests/testthat/test-Dotplot_by_Metadata.R b/tests/testthat/test-Dotplot_by_Metadata.R new file mode 100644 index 0000000..3bfa0c6 --- /dev/null +++ b/tests/testthat/test-Dotplot_by_Metadata.R @@ -0,0 +1,25 @@ + +test_that("dotplot produced and contingency table returned", { + library(Seurat) + obj <- readRDS("/rstudio-files/ccbr-data/users/maggie/SCWorkflow/tests/testthat/fixtures/SO_moduleScore.rds") + celltype_table <- read.csv2("/rstudio-files/ccbr-data/users/maggie/SCWorkflow/tests/testthat/fixtures/cell_types_genes_table.tsv", header = TRUE, sep = "\t") + metadata_column <- "Likely_CellType" + sample_column <- "orig.ident" + genes_column <- "Gene_Names" + cell_type_column <- "Cluster_Names" + + markers <- celltype_table[[genes_column]][!is.na(celltype_table[[genes_column]])] + celltype <- celltype_table[[cell_type_column]][!is.na(celltype_table[[cell_type_column]])] + + results.list <- DotplotMet(object = obj, + metadata = metadata_column, + sample.column = sample_column, + cell.type = celltype, + markers = markers, + dot.color = "darkred") + + print(results.list) + expected.elements <- c("plot","data") + expect_setequal(names(results.list), expected.elements) + +}) diff --git a/tests/testthat/test-Dual_Labeling.R b/tests/testthat/test-Dual_Labeling.R new file mode 100755 index 0000000..825a451 --- /dev/null +++ b/tests/testthat/test-Dual_Labeling.R @@ -0,0 +1,30 @@ +test_that("Test Dual labeling", { + + obj <- readRDS("/rstudio-files/ccbr-data/users/maggie/SCWorkflow/tests/testthat/fixtures/SO_moduleScore.rds") + markertypes <- c("SCT","protein") + reductions <- c("tsne","umap") + genenames <- list(geneset2 <- c("badgene","Cd8a"),gene1 = c("Cd8a","Cd4")) + for(g in genenames){ + for(m in markertypes){ + for(r in reductions){ + print(m) + duallabel.result <- DualLabeling(object = obj, + samples <- c("1_E13","2_E15","3_Newborn","4_Adult"), + marker1 <- g[1], + marker_1_type = m, + marker2 <- g[2], + marker_2_type = m, + data_reduction = r, + density_heatmap = TRUE, + display_unscaled_values = TRUE) + ggsave(file=paste0(g[1],"_",g[2],"_",m,"_",r,"_duallabel.pdf"), + duallabel.result$plot) + } + } + } + + expected.elements = c("so","plot") + expect_setequal(names(duallabel.result), expected.elements) + +}) + \ No newline at end of file diff --git a/tests/testthat/test-Heatmap.R b/tests/testthat/test-Heatmap.R new file mode 100644 index 0000000..7b26150 --- /dev/null +++ b/tests/testthat/test-Heatmap.R @@ -0,0 +1,22 @@ +test_that("Produce heatmap and return filtered dataframe", { + + seurat_object <- readRDS("/rstudio-files/ccbr-data/users/maggie/SCWorkflow/tests/testthat/fixtures/SO_moduleScore.rds") + sample_names <- c("1_E13","2_E15","3_Newborn","4_Adult") + metadata_to_plot <- c("orig_ident","Likely_CellType") + transcripts_to_plot = c("Epcam","Aire","Fezf2","Pigr","Ly6d","Spink5","Ivl","Krt10","Gapdh","Cd8a","Foxp3","Cd4") + proteins_to_plot = c("") + plot_title <- "Heatmap_IO_test" + trim_outliers_percentage <- 0.01 + + heatplot <- Heatmap(object = seurat_object, + sample.names = sample_names, + metadata = metadata_to_plot, + transcripts = transcripts_to_plot, + proteins = NULL, + trim.outliers.percentage = trim_outliers_percentage, + plot.title = "Heatmap") + print(heatplot$data[1:5,1:5]) + expected.elements = c("plot","data") + expect_setequal(names(heatplot), expected.elements) +}) + \ No newline at end of file