Skip to content

Commit

Permalink
Merge pull request #115 from FertigLab/bioc_coding_practice_notes
Browse files Browse the repository at this point in the history
Bioc coding practice notes
  • Loading branch information
dimalvovs authored Apr 24, 2024
2 parents a8ceabb + 7f03e90 commit 289eb0a
Show file tree
Hide file tree
Showing 11 changed files with 122 additions and 65 deletions.
2 changes: 2 additions & 0 deletions R/class_definitions.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ NULL
#' @name domino-class
#' @rdname domino-class
#' @exportClass domino
#' @return an instance of class `domino `
#'
domino <- methods::setClass(
Class = "domino",
Expand Down Expand Up @@ -58,6 +59,7 @@ domino <- methods::setClass(
#' @name linkage_summary-class
#' @rdname linkage_summary-class
#' @exportClass linkage_summary
#' @return an instance of class `linkage_summary`
#'
linkage_summary <- setClass(
Class = "linkage_summary",
Expand Down
54 changes: 29 additions & 25 deletions R/differential_fxns.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,10 @@ count_linkage <- function(linkage_summary, cluster, group.by = NULL, linkage = "
if (is.null(subject_names)) {
subject_names <- linkage_summary@subject_names
}
all_int <- sapply(linkage_summary@subject_linkages, FUN = function(x) {
all_int_ls <- lapply(linkage_summary@subject_linkages, FUN = function(x) {
return(x[[cluster]][[linkage]])
})
all_int <- unlist(all_int_ls)
feature <- table(unlist(all_int))
df <- data.frame(feature = names(feature), total_count = as.numeric(feature))
if (!is.null(group.by)) {
Expand All @@ -130,7 +131,7 @@ count_linkage <- function(linkage_summary, cluster, group.by = NULL, linkage = "
g_subjects <- linkage_summary@subject_meta[g_index, 1]
int_count <- list()
for (f in df[["feature"]]) {
count <- sapply(g_subjects, function(x) {
count <- vapply(g_subjects, FUN.VALUE = logical(1), FUN = function(x) {
f %in% linkage_summary@subject_linkages[[x]][[cluster]][[linkage]]
})
int_count[[f]] <- sum(count)
Expand Down Expand Up @@ -198,36 +199,39 @@ test_differential_linkages <- function(linkage_summary, cluster, group.by, linka
colnames(test_mat) <- c("linkage_present", "linkage_absent")
test_template <- as.data.frame(test_mat)
if (test_name == "fishers.exact") {
test_result <- as.data.frame(t(sapply(result_df[["feature"]], FUN = function(x) {
feat_count <- count_link[count_link[["feature"]] == x, !colnames(count_link) %in% c("feature",
"total_count")]
feat_count <- sapply(feat_count, as.numeric)
# fill contingency table
test_df <- test_template
test_df[["linkage_present"]] <- feat_count
test_df[["linkage_absent"]] <- subject_count[["total"]] - feat_count
# conduct test
test <- fisher.test(test_df)
odds.ratio <- test$estimate
p.value <- test$p.value
res <- c(odds.ratio, p.value)
res <- setNames(res, c("odds.ratio", "p.value"))
return(res)
})))
test_result <- as.data.frame(t(vapply(
result_df[["feature"]], FUN.VALUE = numeric(2), FUN = function(x) {
feat_count <- count_link[count_link[["feature"]] == x, !colnames(count_link) %in% c("feature",
"total_count")]
feat_count <- vapply(feat_count, FUN.VALUE = numeric(1), FUN = as.numeric)
# fill contingency table
test_df <- test_template
test_df[["linkage_present"]] <- feat_count
test_df[["linkage_absent"]] <- subject_count[["total"]] - feat_count
# conduct test
test <- fisher.test(test_df)
odds.ratio <- test$estimate
p.value <- test$p.value
res <- c(odds.ratio, p.value)
res <- setNames(res, c("odds.ratio", "p.value"))
return(res)
})
))
# include fdr-adjusted p-values
test_result[["p.adj"]] <- p.adjust(p = test_result[["p.value"]], method = "fdr")
}
# append test result columns to result
result_df <- cbind(result_df, test_result)
# append counts of active linkages for each group
count_append <- count_link[, !colnames(count_link) == "feature"]
colnames(count_append) <- sapply(colnames(count_append), function(x) {
if (!grepl("_count$", x)) {
return(paste0(x, "_count"))
} else {
return(x)
}
})
colnames(count_append) <- vapply(
colnames(count_append), FUN.VALUE = character(1), FUN = function(x) {
if (!grepl("_count$", x)) {
return(paste0(x, "_count"))
} else {
return(x)
}
})
result_df <- cbind(result_df, count_append)
# append total counts of subjects in each group and the total number of subjects
total_n <- sum(subject_count[["total"]])
Expand Down
52 changes: 38 additions & 14 deletions R/import_fxns.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ create_rl_map_cellphonedb <- function(
}
# Step through the interactions and build rl connections.
rl_map <- NULL
for (i in 1:nrow(interactions)) {
for (i in seq_len(nrow(interactions))) {
inter <- interactions[i, ]
partner_a <- inter[["partner_a"]]
partner_b <- inter[["partner_b"]]
Expand All @@ -87,7 +87,7 @@ create_rl_map_cellphonedb <- function(
component_a <- as.character(complex_a[, c("uniprot_1", "uniprot_2", "uniprot_3", "uniprot_4")])
component_a <- component_a[component_a != ""]
a_features[["uniprot_A"]] <- paste(component_a, collapse = ",")
gene_a <- sapply(component_a, function(x) {
gene_a <- vapply(component_a, FUN.VALUE = character(1), FUN = function(x) {
g <- unique(genes[genes[["uniprot"]] == x, c("gene_name")])
if (!is.null(gene_conv) & !identical(gene_conv[1], gene_conv[2])) {
# if the original gene trying to be converted is not in the gene dictionary the
Expand All @@ -100,7 +100,19 @@ create_rl_map_cellphonedb <- function(
g <- paste(unique(conv_dict[conv_dict[, 1] %in% g, 2]), collapse = ";")
}
}
return(g)
# if multiple genes are annotated for the uniprot ID, use only the first unique instance
if(length(g) == 1){
res <- g
} else {
res <- g[1]
g_col <- paste(g, collapse = ", ")
message(
component_a, " has multiple encoding gene mapped in genes table.\n",
g_col, "\n",
"The first mapping gene is used: ", res
)
}
return(res)
})
a_features[["gene_A"]] <- paste(gene_a, collapse = ",")
# annotation as a receptor or ligand is based on the annotation of the complex
Expand Down Expand Up @@ -143,7 +155,7 @@ create_rl_map_cellphonedb <- function(
component_b <- as.character(complex_b[, c("uniprot_1", "uniprot_2", "uniprot_3", "uniprot_4")])
component_b <- component_b[component_b != ""]
b_features[["uniprot_B"]] <- paste(component_b, collapse = ",")
gene_b <- sapply(component_b, function(x) {
gene_b <- vapply(component_b, FUN.VALUE = character(1), FUN = function(x) {
g <- unique(genes[genes[["uniprot"]] == x, c("gene_name")])
if (!is.null(gene_conv) & !identical(gene_conv[1], gene_conv[2])) {
# if the original gene trying to be converted is not in the gene dictionary the
Expand All @@ -156,7 +168,19 @@ create_rl_map_cellphonedb <- function(
g <- paste(unique(conv_dict[conv_dict[, 1] %in% g, 2]), collapse = ";")
}
}
return(g)
# if multiple genes are annotated for the uniprot ID, use only the first unique instance
if(length(g) == 1){
res <- g
} else {
res <- g[1]
g_col <- paste(g, collapse = ", ")
message(
component_a, " has multiple encoding gene mapped in genes table.\n",
g_col, "\n",
"The first mapping gene is used: ", res
)
}
return(res)
})
b_features[["gene_B"]] <- paste(gene_b, collapse = ",")
# annotation as a receptor or ligand is based on the annotation of the complex
Expand Down Expand Up @@ -325,7 +349,7 @@ create_domino <- function(
if ("database_name" %in% colnames(rl_map)) {
dom@db_info <- rl_map
if (verbose) {
message(paste0("Database provided from source: ", unique(rl_map[["database_name"]])))
message("Database provided from source: ", unique(rl_map[["database_name"]]))
}
} else {
dom@db_info <- rl_map
Expand All @@ -338,7 +362,7 @@ create_domino <- function(
}
# Get genes for receptors
rl_reading <- NULL
for (i in 1:nrow(rl_map)) {
for (i in seq_len(nrow(rl_map))) {
rl <- list()
inter <- rl_map[i, ]
p <- ifelse(inter[["type_A"]] == "R", "A", "B")
Expand All @@ -362,11 +386,12 @@ create_domino <- function(
rl <- as.data.frame(rl)
rl_reading <- rbind(rl_reading, rl)
}
if(nrow(rl_reading) == 0) stop("No genes annotated as receptors included in rl_map")
# save a list of complexes and their components
dom@linkages$complexes <- NULL
if (use_complexes) {
complex_list <- list()
for (i in 1:nrow(rl_reading)) {
for (i in seq_len(nrow(rl_reading))) {
inter <- rl_reading[i, ]
if (grepl("\\,", inter[["L.gene"]])) {
complex_list[[inter[["L.name"]]]] <- unlist(strsplit(inter[["L.gene"]], split = "\\,"))
Expand Down Expand Up @@ -415,7 +440,7 @@ create_domino <- function(
for (clust in levels(dom@clusters)) {
if (verbose) {
cur <- which(levels(dom@clusters) == clust)
message(paste0(cur, " of ", clust_n))
message(cur, " of ", clust_n)
}
cells <- which(dom@clusters == clust)
for (feat in rownames(dom@features)) {
Expand Down Expand Up @@ -463,7 +488,7 @@ create_domino <- function(
# correlation equal to 0.
if (verbose) {
cur <- which(rownames(dom@features) == module)
message(paste0(cur, " of ", n_tf))
message(cur, " of ", n_tf)
}
if (!is.null(dom@linkages$tf_targets)) {
tf <- gsub(pattern = "\\.\\.\\.", replacement = "", module) # correction for AUC values from pySCENIC that append an elipses to TF names due to (+) characters in the orignial python output
Expand Down Expand Up @@ -502,15 +527,14 @@ create_domino <- function(
dom@misc$rec_cor <- rho
# assess correlation among genes in the same receptor complex
cor_list <- list()
for (i in 1:length(names(dom@linkages$rec_lig))) {
for (i in seq_along(names(dom@linkages$rec_lig))) {
r <- names(dom@linkages$rec_lig)[i]
if (r %in% names(dom@linkages$complexes)) {
r_genes <- dom@linkages$complexes[[r]]
} else {
r_genes <- r
}
if (sum(rownames(rho) %in% r_genes) != length(r_genes)) {
message(paste0(r, " has component genes that did not pass testing parameters"))
cor_list[[r]] <- rep(0, ncol(rho))
next
}
Expand All @@ -531,7 +555,7 @@ create_domino <- function(
if (tf_selection_method == "clusters") {
cl_rec_percent <- NULL
for (rec in ser_receptors) {
rec_percent <- sapply(X = levels(dom@clusters), FUN = function(x) {
rec_percent <- vapply(X = levels(dom@clusters), FUN.VALUE = numeric(1), FUN = function(x) {
# percentage of cells in cluster with non-zero expression of receptor gene
sum(dom@counts[rec, dom@clusters == x] > 0) / length(dom@counts[rec, dom@clusters ==
x])
Expand Down Expand Up @@ -624,7 +648,7 @@ add_rl_column <- function(map, map_ref, conv, new_name) {
not_in_ref_map <- c()
}
new_map <- c()
for (r_id in 1:nrow(map)) {
for (r_id in seq_len(nrow(map))) {
row <- map[r_id, ]
conv_ids <- which(conv[, 1] == row[[map_ref]])
for (id in conv_ids) {
Expand Down
32 changes: 16 additions & 16 deletions R/plot_fxns.R
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ signaling_network <- function(
}
# Get vert angle for labeling circos plot
if (layout == "circle") {
v_angles <- 1:length(igraph::V(graph))
v_angles <- seq(length(igraph::V(graph)))
v_angles <- -2 * pi * (v_angles - 1) / length(v_angles)
igraph::V(graph)$label.degree <- v_angles
}
Expand Down Expand Up @@ -365,7 +365,7 @@ gene_network <- function(dom, clust = NULL, OutgoingSignalingClust = NULL,
# Check if signaling exists for target cluster
mat <- dom@cl_signaling_matrices[[cl]]
if (dim(mat)[1] == 0) {
message(paste("No signaling found for", cl, "under build parameters."))
message("No signaling found for ", cl, " under build parameters.")
(next)()
}
all_sums <- c(all_sums, rowSums(mat))
Expand Down Expand Up @@ -465,9 +465,9 @@ gene_network <- function(dom, clust = NULL, OutgoingSignalingClust = NULL,
l[all_ligs, 1] <- -0.75
l[all_recs, 1] <- 0
l[all_tfs, 1] <- 0.75
l[all_ligs, 2] <- (1:length(all_ligs) / mean(1:length(all_ligs)) - 1) * 2
l[all_recs, 2] <- (1:length(all_recs) / mean(1:length(all_recs)) - 1) * 2
l[all_tfs, 2] <- (1:length(all_tfs) / mean(1:length(all_tfs)) - 1) * 2
l[all_ligs, 2] <- (seq_along(all_ligs) / mean(seq_along(all_ligs)) - 1) * 2
l[all_recs, 2] <- (seq_along(all_recs) / mean(seq_along(all_recs)) - 1) * 2
l[all_tfs, 2] <- (seq_along(all_tfs) / mean(seq_along(all_tfs)) - 1) * 2
rownames(l) <- c()
} else if (layout == "random") {
l <- igraph::layout_randomly(graph)
Expand Down Expand Up @@ -553,7 +553,7 @@ feat_heatmap <- function(
na <- which(is.na(mid))
na_feats <- paste(feats[na], collapse = " ")
if (length(na) != 0) {
message(paste("Unable to find", na_feats))
message("Unable to find ", na_feats)
feats <- feats[-na]
}
} else if (feats == "all") {
Expand Down Expand Up @@ -659,7 +659,7 @@ cor_heatmap <- function(
na <- which(is.na(mid))
na_feats <- paste(feats[na], collapse = " ")
if (length(na) != 0) {
message(paste("Unable to find", na_feats))
message("Unable to find ", na_feats)
feats <- feats[-na]
}
} else if (identical(feats, "all")) {
Expand Down Expand Up @@ -756,7 +756,7 @@ cor_scatter <- function(dom, tf, rec, remove_rec_dropout = TRUE, ...) {
#' circos_ligand_receptor(dominoSignal:::pbmc_dom_built_tiny, receptor = "CXCR3")
#' #specify colors
#' cols = c("red", "orange", "green", "blue", "pink", "purple", "slategrey", "firebrick", "hotpink")
#' names(cols) = levels(dominoSignal:::pbmc_dom_built_tiny@clusters)
#' names(cols) = levels(dom_clusters(dominoSignal:::pbmc_dom_built_tiny))
#' circos_ligand_receptor(dominoSignal:::pbmc_dom_built_tiny, receptor = "CXCR3", cell_colors = cols)
#'
circos_ligand_receptor <- function(
Expand All @@ -769,7 +769,7 @@ circos_ligand_receptor <- function(
cell_idents <- sort(unique(dom@clusters))
}
# obtain expression values from cl_signaling matrices
active_chk <- sapply(dom@linkages$clust_rec, function(x) {
active_chk <- vapply(dom@linkages$clust_rec, FUN.VALUE = logical(1), FUN = function(x) {
receptor %in% x
})
if (sum(active_chk)) {
Expand All @@ -783,15 +783,15 @@ circos_ligand_receptor <- function(
signaling_df <- rbind(signaling_df, df)
}
} else {
stop(paste0("No clusters have active ", receptor, " signaling"))
stop("No clusters have active ", receptor, " signaling")
}
signaling_df$mean.expression[is.na(signaling_df$mean.expression)] <- 0
# create a scaled mean expression plot for coord widths greater than 1 by dividing by the max
# expression [range (0-1)] scaled.mean will only be used when the max expression is > 1
signaling_df$scaled.mean.expression <- signaling_df$mean.expression / max(signaling_df$mean.expression)
# exit function if no ligands are expressed above ligand expression threshold
if (sum(signaling_df[["mean.expression"]] > ligand_expression_threshold) == 0) {
stop(paste0("No ligands of ", receptor, " exceed ligand expression threshold."))
stop("No ligands of ", receptor, " exceed ligand expression threshold.")
}
# initialize chord diagram with even ligand arcs
arc_df <- signaling_df[, c("origin", "destination")]
Expand All @@ -814,7 +814,7 @@ circos_ligand_receptor <- function(
names(cell_colors) <- cell_idents
}
grid_col <- c("#FFFFFF") # hide the arc corresponding to the receptor by coloring white
for (i in 1:length(ligands)) {
for (i in seq_along(ligands)) {
grid_col <- c(grid_col, rep(lig_colors[i], length(cell_idents)))
}
names(grid_col) <- c(receptor, signaling_df$origin)
Expand Down Expand Up @@ -900,7 +900,7 @@ plot_differential_linkages <- function(
differential_linkages, test_statistic, stat_range = c(0, 1),
stat_ranking = c("ascending", "descending"), group_palette = NULL) {
if (!test_statistic %in% colnames(differential_linkages)) {
stop(paste0("test statistic '", test_statistic, "' not present in colnames(differential_linkages)"))
stop("test statistic '", test_statistic, "' not present in colnames(differential_linkages)")
}
if (identical(stat_ranking, c("ascending", "descending"))) {
warning("stat_ranking order not specified. Defaulting to ascending order")
Expand All @@ -913,7 +913,7 @@ plot_differential_linkages <- function(
df <- differential_linkages[differential_linkages[[test_statistic]] >= stat_range[1] & differential_linkages[[test_statistic]] <=
stat_range[2], ]
if (nrow(df) == 0) {
stop(paste0("No features with '", test_statistic, "' within stat_range"))
stop("No features with '", test_statistic, "' within stat_range")
}
# order df by plot statistic
if (stat_ranking == "ascending") {
Expand Down Expand Up @@ -955,7 +955,7 @@ plot_differential_linkages <- function(
group_palette <- ggplot_col_gen(length(g_names))
names(group_palette) <- g_names
}
for (i in 1:length(g_names)) {
for (i in seq_along(g_names)) {
g <- g_names[i]
g_count <- paste0(g, "_count")
g_n <- paste0(g, "_n")
Expand Down Expand Up @@ -1003,5 +1003,5 @@ do_norm <- function(mat, dir) {
#'
ggplot_col_gen <- function(n) {
hues <- seq(15, 375, length = n + 1)
return(grDevices::hcl(h = hues, l = 65, c = 100)[1:n])
return(grDevices::hcl(h = hues, l = 65, c = 100)[seq_len(n)])
}
Loading

0 comments on commit 289eb0a

Please sign in to comment.