-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
9f16588
commit 08532bf
Showing
15 changed files
with
1,658 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
^.*\.Rproj$ | ||
^\.Rproj\.user$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,3 +2,4 @@ | |
.nfs* | ||
*.png | ||
Rcheck.txt | ||
.Rproj.user |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,233 @@ | ||
# This code comes from NIDAP 'Color by Genes Automatic [scRNA-seq][CCBR]' code template | ||
# Template Manager https://nidap.nih.gov/workspace/vector/templates/ri.vector.main.template.d71ed4e6-a25d-4f66-a186-27c00a50a703 | ||
# Documentation https://nidap.nih.gov/workspace/notepad/view/ri.notepad.main.notepad.8101be48-85a5-40ec-8fa8-9fe28e5d31cb | ||
|
||
#' @title Create gene expression annotated dimension plots based on user inputted table | ||
#' @description Returns a panel of dimension plots colored by individual gene expression | ||
#' @details Takes in a list of genes inputted by the user, displays gene expression information on tsne, umap, or pca | ||
#' | ||
#' @param SO Seurat-class object | ||
#' @param samples_to_include List of samples to subset the data by | ||
#' @param samples_to_display List of samples to depict on dimension plot, samples not in the list would be colored gray in the background | ||
#' @param marker_list Table of marker genes for each celltype (column names of the table), append "_prot" or "_neg" for proteins or negative markers | ||
#' @param cells_of_interest Vector of celltypes from geneset_dataframe to screen for | ||
#' @param protein_presence Set to TRUE if there are CITE-seq markers in geneset_dataframe | ||
#' @param assay Name of the assay to extract gene expression data from | ||
#' @param reduction_type Choose among tsne, umap, and pca | ||
#' @param point_transparency Set to lower value for more see through points on dimension plot | ||
#' @param point_shape Change the shape of points for between visualization | ||
#' @param number_of_rows Set the number of rows to be displayed on the compiled gene expression dimension plot | ||
#' @param doCiteSeq Set to TRUE if using Cite-seq data | ||
#' | ||
#' @import Seurat | ||
#' @import tidyverse | ||
#' @import gridExtra | ||
#' @import ggpubr | ||
#' @import ggplot2 | ||
#' | ||
#' @export | ||
|
||
#' @return compiled dimension plots of markers, in the same layout as the user-inputted marker table | ||
|
||
color_by_genes <- function(SO, | ||
samples_to_include, | ||
samples_to_display, | ||
marker_list, | ||
cells_of_interest, | ||
protein_presence = FALSE, | ||
assay = "SCT", | ||
reduction_type = "umap", | ||
point_transparency = 0.5, | ||
point_shape = 16, | ||
number_of_rows = 0, | ||
doCiteSeq = FALSE){ | ||
|
||
## --------- ## | ||
## Functions ## | ||
## --------- ## | ||
|
||
plotMarkers <- function(Markers){ | ||
if (is.na(Markers) == TRUE) { | ||
g <- ggplot() + theme_void() | ||
return(g) | ||
} else { | ||
Markers.mat=SO.sub[[assay]]@scale.data[Markers,] | ||
Markers.quant=quantile(Markers.mat[Markers.mat>1],probs=c(.1,.5,.90)) | ||
Markers.mat[Markers.mat>Markers.quant[3]]=Markers.quant[3] | ||
Markers.mat[Markers.mat<Markers.quant[1]]=0 | ||
|
||
if (!(doCiteSeq)) { | ||
if(reduction_type == "tsne"){ | ||
p1 <- DimPlot(SO.sub, reduction = "tsne", group.by = "ident") | ||
clusmat=data.frame(umap1=p1$data$tSNE_1,umap2=p1$data$tSNE_2, Markers=Markers.mat, ident = as.factor(p1$data$ident)) | ||
} | ||
else if(reduction_type == "umap"){ | ||
p1 <- DimPlot(SO.sub, reduction = "umap", group.by = "ident") | ||
clusmat=data.frame(umap1=p1$data$UMAP_1,umap2=p1$data$UMAP_2, Markers=Markers.mat,ident = as.factor(p1$data$ident)) | ||
} | ||
else{ | ||
p1 <- DimPlot(SO.sub, reduction = "pca", group.by = "ident") | ||
clusmat=data.frame(umap1=p1$data$PC_1,umap2=p1$data$PC_2, Markers=Markers.mat,ident = as.factor(p1$data$ident)) | ||
} #if CITEseq is chosen then: | ||
} else { | ||
if(reduction_type == "tsne"){ | ||
p1 <- DimPlot(SO.sub, reduction = "protein_tsne", group.by = "ident") | ||
clusmat=data.frame(umap1=p1$data$protein_tsne_1,umap2=p1$data$protein_tsne_2, Markers=Markers.mat,ident = as.factor(p1$data$ident)) | ||
} | ||
else if(reduction_type == "umap"){ | ||
p1 <- DimPlot(SO.sub, reduction = "protein_umap", group.by = "ident") | ||
clusmat=data.frame(umap1=p1$data$protein_umap_1,umap2=p1$data$protein_umap_2, Markers=Markers.mat,ident = as.factor(p1$data$ident)) | ||
} | ||
else{ | ||
p1 <- DimPlot(SO.sub, reduction = "protein_pca", group.by = "ident") | ||
clusmat=data.frame(umap1=p1$data$protein_pca_1,umap2=p1$data$protein_pca_2, Markers=Markers.mat,ident = as.factor(p1$data$ident)) | ||
} | ||
} | ||
|
||
# Samples caption | ||
samples_displayed_message <- paste(samples_to_display, sep = "", collapse = "\n") | ||
final_caption <- paste("Samples Displayed: ", samples_displayed_message, sep = "", collapse = "\n") | ||
|
||
clusmat <- mutate(clusmat, sample_Markers = clusmat$Markers * grepl(paste(samples_to_display, collapse = "|"), clusmat$ident)) | ||
|
||
clusmat %>% dplyr::arrange(sample_Markers) -> clusmat | ||
if (grepl("_neg",Markers) == TRUE){ | ||
|
||
clusmat %>% dplyr::arrange(desc(sample_Markers)) -> clusmat | ||
g <- ggplot(clusmat, aes(x=umap1, y=umap2, group=ident)) + | ||
theme_bw() + | ||
theme(legend.title=element_blank()) + | ||
ggtitle(Markers) + | ||
geom_point(aes(color=sample_Markers, shape=ident),alpha=point_transparency,shape=point_shape, size=1) + | ||
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), | ||
panel.background = element_blank(),legend.text=element_text(size=rel(0.5)) )+ | ||
scale_color_gradient(limits = c(0, Markers.quant[3]),low = "lightgrey", high = "red") + | ||
xlab("umap-1") + ylab("umap-2") | ||
return(g) | ||
} else { | ||
clusmat %>% dplyr::arrange(sample_Markers) -> clusmat | ||
g <- ggplot(clusmat, aes(x=umap1, y=umap2, group=ident)) + | ||
theme_bw() + | ||
theme(legend.title=element_blank()) + | ||
ggtitle(Markers) + | ||
geom_point(aes(color=sample_Markers, shape=ident),alpha=point_transparency,shape=point_shape, size=1) + | ||
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), | ||
panel.background = element_blank(),legend.text=element_text(size=rel(0.5)) )+ | ||
scale_color_gradient(limits = c(0, Markers.quant[3]),low = "lightgrey", high = "red") + | ||
xlab("umap-1") + ylab("umap-2") | ||
return(g)} | ||
} | ||
} | ||
|
||
## --------------- ## | ||
## Main Code Block ## | ||
## --------------- ## | ||
|
||
if (length(samples_to_include) == 0) { | ||
samples_to_include = unique(SO@meta.data$sample_name) | ||
} | ||
|
||
if("active.ident" %in% slotNames(SO)){ | ||
sample_name = as.factor(SO@meta.data$orig.ident) | ||
names(sample_name)=names(SO@active.ident) | ||
SO@active.ident <- as.factor(vector()) | ||
SO@active.ident <- sample_name | ||
SO.sub = subset(SO, ident = samples_to_include) | ||
} else { | ||
sample_name = as.factor(SO@meta.data$orig.ident) | ||
names(sample_name)=names(SO@active.ident) | ||
SO@active.ident <- as.factor(vector()) | ||
SO@active.ident <- sample_name | ||
SO.sub = subset(SO, ident = samples_to_include) | ||
} | ||
|
||
marker_list <- marker_list[cells_of_interest] | ||
|
||
# Remove columns with all missing values | ||
Present_Markers_ls <- list() | ||
|
||
for (celltype in colnames(marker_list)) { | ||
print(names(marker_list[celltype])) | ||
present=lapply(marker_list[[celltype]], function(x) x %in% rownames(SO.sub$SCT@scale.data)) | ||
absentgenes = unlist(marker_list[[celltype]])[present==FALSE] | ||
presentgenes = unlist(marker_list[[celltype]])[present==TRUE] | ||
print(paste0("Genes not present: ",paste0(absentgenes,collapse=","))) | ||
print(paste0("Genes present: ",paste0(presentgenes,collapse=","))) | ||
|
||
if(length(presentgenes) == 0){ | ||
print(paste0(names(marker_list[celltype]), " genes were not found in SO and will not be analyzed")) | ||
} else {Present_Markers_ls[[celltype]] <- presentgenes} | ||
} | ||
|
||
# Padd processed list containing only the present genes | ||
padded_list <- lapply(Present_Markers_ls, `length<-`, max(lengths(Present_Markers_ls))) | ||
Markers_from_list <- as.data.frame.matrix(do.call(cbind, padded_list)) | ||
|
||
# Recognize any markers designated as proteins and insert protein expression data into slot where plots are created | ||
if (protein_presence){ | ||
protein_markers <- Markers_from_list[grepl("_prot",Markers_from_list)] | ||
|
||
protein_orig_markers <- gsub("_prot.*","",protein_markers) | ||
|
||
protein_markers_name <- paste(protein_orig_markers, | ||
"_prot", sep = "") | ||
|
||
i = 0 | ||
protein_array <- list() | ||
for (i in seq_along(protein_orig_markers)){ | ||
protein_array[[i]] <- SO.sub@assays$Protein[protein_orig_markers[i],] | ||
rownames(protein_array[[i]]) <- protein_markers_name[i] | ||
} | ||
protein_array_comp <- do.call(rbind,protein_array) | ||
SO.sub@assays$SCT@scale.data <- rbind(SO.sub@assays$SCT@scale.data,protein_array_comp) | ||
} | ||
|
||
# Add negative/low identifiers | ||
neg_markers_names <- Markers_from_list[grepl("_neg",Markers_from_list)] | ||
orig_markers <- gsub("_neg.*","",neg_markers_names) | ||
|
||
# Append neg_markers_names to rownames of SO.sub | ||
neg_markers_list <- list() | ||
|
||
# Calculate adjusted expression for negative markers | ||
for (i in seq_along(orig_markers)){ | ||
if (orig_markers[i] %in% rownames(SO.sub@assays$SCT@scale.data)){ | ||
# Format the data so that it can rbinded with [email protected] | ||
neg_markers_list[[i]] <- t(matrix(max(SO.sub@assays$SCT@scale.data[orig_markers[i],]) - SO.sub@assays$SCT@scale.data[orig_markers[i],])) | ||
row.names(neg_markers_list[[i]]) <- neg_markers_names[i] | ||
colnames(neg_markers_list[[i]]) <- colnames(SO.sub@assays$SCT@scale.data) | ||
|
||
# Append new Negative/low marker (w Expression Count) to SO slot | ||
SO.sub@assays$SCT@scale.data <- rbind(SO.sub@assays$SCT@scale.data, neg_markers_list[[i]]) | ||
} else { | ||
print(paste(orig_markers[i], " is not found in Seurat, cannot calculate negative expression", sep = "")) | ||
}} | ||
|
||
Markers_present = unlist(Markers_from_list) | ||
|
||
if(!length(Markers_present)>0){ | ||
print("No Markers found in dataset") | ||
return(NULL) | ||
} | ||
|
||
# Create list for storing color by gene plots of each celltype column | ||
gg_storage <- list() | ||
|
||
for (cell in colnames(Markers_from_list)){ | ||
|
||
title <- cell | ||
|
||
markers_to_analyze <- as.character(Markers_from_list[[cell]]) | ||
|
||
grob <- lapply(markers_to_analyze,function(x) plotMarkers(x)) | ||
gg_storage[[cell]] <- gridExtra::arrangeGrob(grobs=grob,ncol = 1,newpage=F, as.table = F, top = text_grob(title,size = 15, face = "bold")) | ||
|
||
} | ||
|
||
#pdf("Color_by_Genes.pdf", 12, 15) | ||
final_figures <- do.call(grid.arrange, c(gg_storage, ncol = ncol(Markers_from_list))) | ||
#dev.off() | ||
|
||
return(final_figures) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
# This code comes from NIDAP 'Harmony Batch Correction [scRNA-seq][CCBR]' code template | ||
# Template Manager https://nidap.nih.gov/workspace/vector/templates/ri.vector.main.template.a097334e-21ac-466c-b467-c761b2c25f02 | ||
# Documentation https://nidap.nih.gov/workspace/notepad/view/ri.notepad.main.notepad.facd9ed0-90fc-4c55-bbb8-0153f06858d4 | ||
|
||
#' @title Performs batch correction on Seurat-class object | ||
#' @description Returns a Seurat-class object with adjusted cell embeddings and gene expression data to account for variations in sample collection | ||
#' @details Takes in a list of genes inputted by the user, displays gene expression information in particular slot-assay with (optional) outliers removed | ||
#' | ||
#' @param seurat_object Seurat-class object | ||
#' @param variable_features Number of most variable genes to subset the gene expression data by | ||
#' @param genes_to_add Add genes that might not be found among variably expressed genes | ||
#' @param group.by.vars which variable should be accountted for when running batch correction | ||
|
||
#' @import Seurat | ||
#' @import harmony | ||
#' @import gridExtra | ||
#' @import RColorBrewer | ||
#' @import ggplot2 | ||
#' | ||
#' @export | ||
|
||
#' @return Seurat-class Object with Harmony-adjusted gene expression (SCT slot) and tSNE cell embedding | ||
|
||
|
||
harmony_batch_correct <- function(so, | ||
variable_features = 2000, | ||
genes_to_add, | ||
group.by.vars) { | ||
|
||
## --------------- ## | ||
## Main Code Block ## | ||
## --------------- ## | ||
|
||
seur.SCT <- so@assays$SCT@scale.data | ||
seur.SCT[1:10, 1:10] | ||
|
||
genes_of_interest <- genes_to_add[genes_to_add %in% rownames(so@assays$SCT@scale.data)] | ||
|
||
# Add more genes to analyze | ||
VariableFeatures(so) <- c(VariableFeatures(so), genes_of_interest) | ||
mvf <- VariableFeatures(so)[1:(variable_features + length(genes_of_interest))] | ||
print(dim(so@assays$SCT@scale.data)) | ||
seur.SCT <- seur.SCT[mvf,] | ||
seur.SCT <- t(seur.SCT) | ||
# need the original loadings and embeddings to compare with my SVD | ||
seur.loads <- so@reductions$pca@feature.loadings | ||
seur.pca <- so@reductions$pca@cell.embeddings | ||
sm.pca <- seur.pca[1:10, 1:10] | ||
sm.seur.ldngs <- seur.loads[1:10, 1:10] | ||
|
||
# SVD on scaled counts | ||
|
||
variable_features <- length(mvf) | ||
Sys.time() | ||
pppca <- svd(seur.SCT) | ||
Sys.time() | ||
ppembed <- pppca$u %*% diag(pppca$d) | ||
pcnames <- vector(mode = "character") | ||
for (i in 1:dim(ppembed)[2])pcnames[i] <- paste("PC", i, sep = "_") | ||
colnames(ppembed) <- pcnames | ||
rownames(ppembed) <- rownames(seur.SCT) | ||
sm.ppembed <- ppembed[1:10, 1:10] | ||
ppldngs <- pppca$v | ||
colnames(ppldngs) <- pcnames | ||
rownames(ppldngs) <- mvf | ||
sm.ppldngs <- ppldngs[1:10, 1:10] | ||
ppembed[1:10, 1:10] | ||
ppldngs[1:10, 1:10] | ||
|
||
so@reductions$pca@cell.embeddings <- ppembed | ||
so@reductions$pca@feature.loadings <- ppldngs | ||
so@reductions$pca@stdev <- pppca$d | ||
print(Sys.time()) | ||
|
||
so <- RunHarmony(so, group.by.vars, | ||
do_pca=FALSE, | ||
assay.use = "SCT", | ||
plot_convergence = TRUE, | ||
return_object=TRUE) | ||
|
||
head(so@reductions$harmony@cell.embeddings) | ||
head(so@reductions$harmony@feature.loadings) | ||
|
||
so <- RunUMAP(so, reduction = "harmony",dims=1:4) | ||
so <- RunTSNE(so, reduction = "harmony",dims=1:4) | ||
|
||
sdat <- data.frame(as.vector(so@reductions$tsne@cell.embeddings[,1]), | ||
as.vector(so@reductions$tsne@cell.embeddings[,2]), | ||
so@meta.data[eval(parse(text = "group.by.vars"))]) | ||
names(sdat) <- c("TSNE1","TSNE2","ident") | ||
|
||
n <- 2e3 | ||
set.seed(10) | ||
ourColorSpace <- colorspace::RGB(runif(n), runif(n), runif(n)) | ||
ourColorSpace <- as(ourColorSpace, "LAB") | ||
|
||
|
||
distinctColorPalette <-function(k=1) { | ||
currentColorSpace <- ourColorSpace@coords | ||
# Set iter.max to 20 to avoid convergence warnings. | ||
set.seed(1) | ||
km <- kmeans(currentColorSpace, k, iter.max=20) | ||
colors <- unname(hex(LAB(km$centers))) | ||
return(colors) | ||
} | ||
|
||
n <- 60 | ||
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',] | ||
qual_col_pals = qual_col_pals[c(7,6,2,1,8,3,4,5),] | ||
cols = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals))) | ||
|
||
g1 <- ggplot(sdat, aes(x=TSNE1, y=TSNE2)) + | ||
theme_bw() + | ||
theme(legend.title=element_blank()) + | ||
geom_point(aes(colour=ident),size=1) + | ||
scale_color_manual(values=cols) + | ||
theme(panel.grid.major = element_blank(), | ||
panel.grid.minor = element_blank(), | ||
legend.position="top", | ||
panel.background = element_blank(), | ||
legend.text=element_text(size=rel(0.5))) + | ||
guides(colour = guide_legend(override.aes = list(size=5, alpha = 1))) | ||
|
||
solist <- list(so, ppldngs) | ||
|
||
# Calculate adjusted gene expression from embeddings | ||
so <- solist[[1]] | ||
ppldngs <- solist[[2]] | ||
seur.SCT <- t(so@assays$SCT@scale.data) | ||
harm.embeds <- so@reductions$harmony@cell.embeddings | ||
#print(dim(harm.embeds)) | ||
harm.lvl.backcalc <- harm.embeds %*% t(ppldngs) | ||
|
||
sm.harmexpr <- harm.lvl.backcalc[1:10, 1:4] | ||
sm.comp.scld <- (seur.SCT[rownames(sm.harmexpr), colnames(sm.harmexpr)]) | ||
|
||
# Convert to ggplot that includes color by samples for higher resolution of batch correction, use for loop to screen for different genes | ||
so@assays$SCT@scale.data <- t(harm.lvl.backcalc) | ||
|
||
return(so) | ||
} |
Oops, something went wrong.