-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
301 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
Package: ENIGMA | ||
Type: Package | ||
Title: dEcoNvolutIon method based on reGularized MAtrix completion | ||
Version: 0.1.1 | ||
Version: 0.1.5 | ||
Author: Weixu Wang, Jun Yao | ||
Maintainer: Weixu Wang <[email protected]> | ||
Description: Improved estimation of cell type-specific gene expression through | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,209 @@ | ||
#' @title Perform CTS-DEG analysis | ||
#' | ||
#' @description Analysis the differential expression genes for each cell type | ||
#' | ||
#' @param object ENIGMA object | ||
#' @param FDR_control | ||
#' if use Wang et al., FDR controlled DEG analysis model. Default: TRUE | ||
#' | ||
#' @param FoldChange | ||
#' if output the FoldChange of each gene, if FALSE, then output the expression difference | ||
#' | ||
#' @param covariate | ||
#' The data.frame object contains the covariate information of each sample | ||
#' | ||
#' @return A list object contain the DEG results of each cell types | ||
#' | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' DEG = FindCSE_DEG(object,y) | ||
#' DEG = FindCSE_DEG(object,y,covariate=covariate) | ||
#' head(DEG$celltype1) | ||
#' } | ||
#' | ||
#' @reference | ||
#' Wang J, Roeder K, Devlin B. Bayesian estimation of cell type–specific gene expression with prior derived from single-cell data[J]. Genome research, 2021, 31(10): 1807-1818. | ||
#' | ||
#' @export | ||
FindCSE_DEG <- function(object,y,FDR_control = TRUE,covariate=NULL,FoldChange = FALSE){ | ||
#### | ||
#identify the DEG of ENIGMA outputs require normalized profile | ||
if(is.null(object@result_CSE_normalized)){ | ||
stop('CTS-DEG analysis required normalized profile!') | ||
} | ||
#convert sce object into 3-order array | ||
Exp = sce2array(object) | ||
cellName = dimnames(Exp)[[3]] | ||
DEG_list = list() | ||
if(FDR_control){result <- DEG_test(Exp,y,covariate)}else{result <- DEG_test2(Exp,y,covariate)} | ||
ES_m <- EffectiveSize(Exp,y,FoldChange) | ||
for(i in cellName){ | ||
Tab_m <- cbind(ES_m[,i],result$pval[,i],result$qval[,i]) | ||
if(FoldChange){colnames(Tab_m) <- c("FoldChange","pvalue","qvalue")}else{ | ||
colnames(Tab_m) <- c("ExpressionDifference","pvalue","qvalue") | ||
} | ||
DEG_list[[i]] <- Tab_m | ||
} | ||
DEG_list | ||
} | ||
|
||
EffectiveSize <- function(X_array,y,FoldChange=FoldChange){ | ||
if(FoldChange){ | ||
### convert all profile into pseudo positive | ||
X_array[X_array<0] <- 0 | ||
ES_m <- NULL | ||
nCell = dim(X_array)[3] | ||
cellName = dimnames(X_array)[[3]] | ||
for(i in 1:nCell){ | ||
G = X_array[,,i] | ||
FC <- apply(G,1,FCcall,y=y) | ||
ES_m = cbind(ES_m,FC) | ||
} | ||
colnames(ES_m) <- cellName | ||
}else{ | ||
ES_m <- NULL | ||
nCell = dim(X_array)[3] | ||
cellName = dimnames(X_array)[[3]] | ||
for(i in 1:nCell){ | ||
G = X_array[,,i] | ||
FC <- apply(G,1,DEcall,y=y) | ||
ES_m = cbind(ES_m,FC) | ||
} | ||
colnames(ES_m) <- cellName | ||
} | ||
ES_m | ||
} | ||
|
||
FCcall <- function(g,y){ | ||
mean(g[y==1])/mean(g[y==0]) | ||
} | ||
|
||
DEcall <- function(g,y){ | ||
mean(g[y==1]) - mean(g[y==0]) | ||
} | ||
|
||
|
||
DEG_test <- function(X_array,y,covariate=NULL){ | ||
O = array(0, | ||
dim = c( dim(X_array)[1], | ||
dim(X_array)[3], | ||
dim(X_array)[2]), | ||
dimnames = list( dimnames(X_array)[[1]], | ||
dimnames(X_array)[[3]], | ||
dimnames(X_array)[[2]]) | ||
) | ||
for(i in 1:dim(X_array)[3]){ | ||
O[,i,] <- X_array[,,i] | ||
} | ||
###Using ANOVA+glm model to evaluate prediction performance | ||
result <- test(O,y,covariate) | ||
pval <- t(result$pval) | ||
qval <- t(result$qval) | ||
colnames(qval) <- colnames(pval) <- dimnames(X_array)[[3]] | ||
rownames(qval) <- rownames(pval) <- dimnames(X_array)[[1]] | ||
return(list(qval = qval, pval = pval)) | ||
} | ||
|
||
|
||
get_pval <- function (pval, cell_type, K) | ||
{ | ||
pval0 = rep(NA, K) | ||
names(pval0) = cell_type | ||
names = intersect(names(pval), cell_type) | ||
pval0[names] = pval[names] | ||
return(pval0) | ||
} | ||
|
||
pval2qval <- function (pval, A, y, covariate = NULL) | ||
{ | ||
ng = nrow(A) | ||
if (is.null(covariate)) | ||
pval1 = sapply(1:ng, function(g) try(summary(manova(t(A[g, | ||
, ]) ~ y))$stats[1, "Pr(>F)"], silent = T)) | ||
else pval1 = sapply(1:ng, function(g) try(summary(manova(t(A[g, | ||
, ]) ~ y + covariate))$stats[1, "Pr(>F)"], silent = T)) | ||
pval = pval[, !is.na(as.numeric(pval1))] | ||
pval1 = na.omit(as.numeric(pval1)) | ||
qval1 = p.adjust(pval1, "fdr") | ||
qval = pval | ||
K = ncol(A) | ||
for (i in 1:ncol(pval)) { | ||
qval[, i] = 1 | ||
if (min(pval[, i], na.rm = T) < 0.05/K) | ||
qval[, i][which.min(pval[, i])] = qval1[i] | ||
} | ||
return(qval) | ||
} | ||
|
||
|
||
test <- function (A, y, covariate = NULL) | ||
{ | ||
if (dim(A)[3] != length(y)) | ||
print("CSE estimates and y have different length") | ||
if (!is.null(covariate)) | ||
if (dim(A)[3] != nrow(covariate)) | ||
print("CSE estimates and covariate have different number of samples/subjects") | ||
else { | ||
if (!is.null(rownames(covariate)) & any(rownames(covariate) != | ||
dimnames(A)[[3]])) | ||
covariate = covariate[dimnames(A)[[3]], ] | ||
} | ||
K = ncol(A) | ||
cell_type = colnames(A) | ||
if (is.null(covariate)) | ||
pval = apply(A, 1, function(x) { | ||
pval = coef(summary(glm(y ~ ., data = data.frame(t(x)), | ||
family = "binomial")))[, 4] | ||
return(get_pval(pval, cell_type, K)) | ||
}) | ||
else pval = apply(A, 1, function(x) { | ||
pval = coef(summary(glm(y ~ ., data = data.frame(t(x), | ||
covariate), family = "binomial")))[, 4] | ||
return(get_pval(pval, cell_type, K)) | ||
}) | ||
qval = pval2qval(pval, A, y, covariate) | ||
return(list(qval = qval, pval = pval)) | ||
} | ||
|
||
DEG_test2 <- function(X_array,y,covariate=NULL){ | ||
dims_ct <- dim(X_array)[3] | ||
cellName <- dimnames(X_array)[[3]] | ||
if(is.null(covariate)){ | ||
pval <- qval <- NULL | ||
for(ct in 1:dims_ct){ | ||
mat <- X_array[,,ct] | ||
p <- NULL | ||
for(i in 1:nrow(mat)){ | ||
p <- c(p, summary(lm(mat[i,]~as.numeric(y)))$coefficients[2,4]) | ||
} | ||
q = p.adjust(p, "fdr") | ||
pval <- cbind(pval,p) | ||
qval <- cbind(qval,q) | ||
} | ||
colnames(qval) <- colnames(pval) <- cellName | ||
rownames(qval) <- rownames(pval) <- dimnames(X_array)[[1]] | ||
|
||
}else{ | ||
pval <- qval <- NULL | ||
cvname <- colnames(covariate) | ||
dims_ct <- dim(X_array)[3] | ||
for(ct in 1:dims_ct){ | ||
mat <- X_array[,,ct] | ||
p <- NULL | ||
for(i in 1:nrow(mat)){ | ||
dat <- cbind(mat[i,],y,covariate) | ||
dat <- as.data.frame(dat) | ||
colnames(dat) <- c("x","y",cvname) | ||
p <- c(p, summary(lm(x~.,dat=dat))$coefficients[2,4]) | ||
} | ||
q = p.adjust(p, "fdr") | ||
pval <- cbind(pval,p) | ||
qval <- cbind(qval,q) | ||
} | ||
colnames(qval) <- colnames(pval) <- cellName | ||
rownames(qval) <- rownames(pval) <- dimnames(X_array)[[1]] | ||
} | ||
|
||
return(list(qval = qval, pval = pval)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,55 @@ | ||
# Hello, world! | ||
# | ||
# This is an example function named 'hello' | ||
# which prints 'Hello, world!'. | ||
# | ||
# You can learn more about package authoring with RStudio at: | ||
# | ||
# http://r-pkgs.had.co.nz/ | ||
# | ||
# Some useful keyboard shortcuts for package authoring: | ||
# | ||
# Install Package: 'Ctrl + Shift + B' | ||
# Check Package: 'Ctrl + Shift + E' | ||
# Test Package: 'Ctrl + Shift + T' | ||
X = t(matrix(c(1:12),nrow=3,ncol=4)) | ||
theta = matrix(c(0.5,0,1,0.5,1,0),nrow=3,ncol=2) | ||
P = array(0, | ||
dim = c( nrow(X), | ||
ncol(X), | ||
ncol(theta)), | ||
dimnames = list( rownames(X), | ||
colnames(X), | ||
colnames(theta)) | ||
) | ||
R <- matrix(c(2,4,4,6,3,2,5,4),nrow=4,ncol=2) | ||
derive_P2 <- function(X, theta, P_old,R,alpha){ | ||
## P_old: a tensor variable with three dimensions | ||
## theta: the cell type proportions variable | ||
## cell_type_index: optimize which type of cells | ||
## R: reference matrix | ||
dP1 <- dP2 <- array(0, | ||
dim = c( nrow(X), | ||
ncol(X), | ||
ncol(theta)), | ||
dimnames = list( rownames(X), | ||
colnames(X), | ||
colnames(theta)) | ||
) | ||
for(cell_type_index in 1:ncol(theta)){ | ||
R.m <- as.matrix(R[,cell_type_index]) | ||
|
||
hello <- function() { | ||
print("Hello, world!") | ||
cell_type_seq <- c(1:ncol(theta)) | ||
cell_type_seq <- cell_type_seq[cell_type_seq!=cell_type_index] | ||
|
||
X_summary = Reduce("+", | ||
lapply(cell_type_seq, function(i) P_old[,,i]%*%diag(theta[,i]) ) | ||
) | ||
X_summary <- X-X_summary | ||
|
||
dP1[,,cell_type_index] <- 2*(P_old[,,cell_type_index]%*%diag(theta[,cell_type_index]) - X_summary)%*%diag(theta[,cell_type_index]) | ||
dP2[,,cell_type_index] <- 2*(as.matrix(rowMeans(P_old[,,cell_type_index]))-R.m)%*%t(as.matrix(rep((1/ncol(dP2[,,cell_type_index])),ncol(dP2[,,cell_type_index])))) | ||
} | ||
print(dP1) | ||
print(dP2) | ||
dP1 = dP1 / sqrt( sum( dP1^2 ) ) * 1e5 | ||
dP2 = dP2 / sqrt( sum( dP2^2 ) ) * 1e5 | ||
|
||
#calculate w1 | ||
#if( crossprod(as.matrix(dP1), as.matrix(dP2)) >= crossprod(as.matrix(dP1)) ) {w1 = 1} | ||
#else if( crossprod(as.matrix(dP1), as.matrix(dP2)) >= crossprod(as.matrix(dP2)) ) {w1 = 0} | ||
#else { | ||
# w1 = crossprod(as.matrix(dP2-dP1), as.matrix(dP2))/sum((dP1-dP2)^2) | ||
#} | ||
w1 <- alpha | ||
w2 <- 1-w1 | ||
|
||
dP <- dP1*as.numeric(w1) + dP2*as.numeric(w2) | ||
return(dP) | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.