Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
WWXkenmo authored Mar 15, 2022
1 parent 5e9c827 commit 2afe55e
Show file tree
Hide file tree
Showing 5 changed files with 301 additions and 17 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: ENIGMA
Type: Package
Title: dEcoNvolutIon method based on reGularized MAtrix completion
Version: 0.1.1
Version: 0.1.5
Author: Weixu Wang, Jun Yao
Maintainer: Weixu Wang <[email protected]>
Description: Improved estimation of cell type-specific gene expression through
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export(ENIGMA_L2_max_norm)
export(ENIGMA_trace_norm)
export(FindCSE_DEG)
export(batch_correct)
export(cell_deconvolve_trace)
export(clean_model)
Expand Down
209 changes: 209 additions & 0 deletions R/FindCSE_DEG.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
#' @title Perform CTS-DEG analysis
#'
#' @description Analysis the differential expression genes for each cell type
#'
#' @param object ENIGMA object
#' @param FDR_control
#' if use Wang et al., FDR controlled DEG analysis model. Default: TRUE
#'
#' @param FoldChange
#' if output the FoldChange of each gene, if FALSE, then output the expression difference
#'
#' @param covariate
#' The data.frame object contains the covariate information of each sample
#'
#' @return A list object contain the DEG results of each cell types
#'
#'
#' @examples
#' \dontrun{
#' DEG = FindCSE_DEG(object,y)
#' DEG = FindCSE_DEG(object,y,covariate=covariate)
#' head(DEG$celltype1)
#' }
#'
#' @reference
#' Wang J, Roeder K, Devlin B. Bayesian estimation of cell type–specific gene expression with prior derived from single-cell data[J]. Genome research, 2021, 31(10): 1807-1818.
#'
#' @export
FindCSE_DEG <- function(object,y,FDR_control = TRUE,covariate=NULL,FoldChange = FALSE){
####
#identify the DEG of ENIGMA outputs require normalized profile
if(is.null(object@result_CSE_normalized)){
stop('CTS-DEG analysis required normalized profile!')
}
#convert sce object into 3-order array
Exp = sce2array(object)
cellName = dimnames(Exp)[[3]]
DEG_list = list()
if(FDR_control){result <- DEG_test(Exp,y,covariate)}else{result <- DEG_test2(Exp,y,covariate)}
ES_m <- EffectiveSize(Exp,y,FoldChange)
for(i in cellName){
Tab_m <- cbind(ES_m[,i],result$pval[,i],result$qval[,i])
if(FoldChange){colnames(Tab_m) <- c("FoldChange","pvalue","qvalue")}else{
colnames(Tab_m) <- c("ExpressionDifference","pvalue","qvalue")
}
DEG_list[[i]] <- Tab_m
}
DEG_list
}

EffectiveSize <- function(X_array,y,FoldChange=FoldChange){
if(FoldChange){
### convert all profile into pseudo positive
X_array[X_array<0] <- 0
ES_m <- NULL
nCell = dim(X_array)[3]
cellName = dimnames(X_array)[[3]]
for(i in 1:nCell){
G = X_array[,,i]
FC <- apply(G,1,FCcall,y=y)
ES_m = cbind(ES_m,FC)
}
colnames(ES_m) <- cellName
}else{
ES_m <- NULL
nCell = dim(X_array)[3]
cellName = dimnames(X_array)[[3]]
for(i in 1:nCell){
G = X_array[,,i]
FC <- apply(G,1,DEcall,y=y)
ES_m = cbind(ES_m,FC)
}
colnames(ES_m) <- cellName
}
ES_m
}

FCcall <- function(g,y){
mean(g[y==1])/mean(g[y==0])
}

DEcall <- function(g,y){
mean(g[y==1]) - mean(g[y==0])
}


DEG_test <- function(X_array,y,covariate=NULL){
O = array(0,
dim = c( dim(X_array)[1],
dim(X_array)[3],
dim(X_array)[2]),
dimnames = list( dimnames(X_array)[[1]],
dimnames(X_array)[[3]],
dimnames(X_array)[[2]])
)
for(i in 1:dim(X_array)[3]){
O[,i,] <- X_array[,,i]
}
###Using ANOVA+glm model to evaluate prediction performance
result <- test(O,y,covariate)
pval <- t(result$pval)
qval <- t(result$qval)
colnames(qval) <- colnames(pval) <- dimnames(X_array)[[3]]
rownames(qval) <- rownames(pval) <- dimnames(X_array)[[1]]
return(list(qval = qval, pval = pval))
}


get_pval <- function (pval, cell_type, K)
{
pval0 = rep(NA, K)
names(pval0) = cell_type
names = intersect(names(pval), cell_type)
pval0[names] = pval[names]
return(pval0)
}

pval2qval <- function (pval, A, y, covariate = NULL)
{
ng = nrow(A)
if (is.null(covariate))
pval1 = sapply(1:ng, function(g) try(summary(manova(t(A[g,
, ]) ~ y))$stats[1, "Pr(>F)"], silent = T))
else pval1 = sapply(1:ng, function(g) try(summary(manova(t(A[g,
, ]) ~ y + covariate))$stats[1, "Pr(>F)"], silent = T))
pval = pval[, !is.na(as.numeric(pval1))]
pval1 = na.omit(as.numeric(pval1))
qval1 = p.adjust(pval1, "fdr")
qval = pval
K = ncol(A)
for (i in 1:ncol(pval)) {
qval[, i] = 1
if (min(pval[, i], na.rm = T) < 0.05/K)
qval[, i][which.min(pval[, i])] = qval1[i]
}
return(qval)
}


test <- function (A, y, covariate = NULL)
{
if (dim(A)[3] != length(y))
print("CSE estimates and y have different length")
if (!is.null(covariate))
if (dim(A)[3] != nrow(covariate))
print("CSE estimates and covariate have different number of samples/subjects")
else {
if (!is.null(rownames(covariate)) & any(rownames(covariate) !=
dimnames(A)[[3]]))
covariate = covariate[dimnames(A)[[3]], ]
}
K = ncol(A)
cell_type = colnames(A)
if (is.null(covariate))
pval = apply(A, 1, function(x) {
pval = coef(summary(glm(y ~ ., data = data.frame(t(x)),
family = "binomial")))[, 4]
return(get_pval(pval, cell_type, K))
})
else pval = apply(A, 1, function(x) {
pval = coef(summary(glm(y ~ ., data = data.frame(t(x),
covariate), family = "binomial")))[, 4]
return(get_pval(pval, cell_type, K))
})
qval = pval2qval(pval, A, y, covariate)
return(list(qval = qval, pval = pval))
}

DEG_test2 <- function(X_array,y,covariate=NULL){
dims_ct <- dim(X_array)[3]
cellName <- dimnames(X_array)[[3]]
if(is.null(covariate)){
pval <- qval <- NULL
for(ct in 1:dims_ct){
mat <- X_array[,,ct]
p <- NULL
for(i in 1:nrow(mat)){
p <- c(p, summary(lm(mat[i,]~as.numeric(y)))$coefficients[2,4])
}
q = p.adjust(p, "fdr")
pval <- cbind(pval,p)
qval <- cbind(qval,q)
}
colnames(qval) <- colnames(pval) <- cellName
rownames(qval) <- rownames(pval) <- dimnames(X_array)[[1]]

}else{
pval <- qval <- NULL
cvname <- colnames(covariate)
dims_ct <- dim(X_array)[3]
for(ct in 1:dims_ct){
mat <- X_array[,,ct]
p <- NULL
for(i in 1:nrow(mat)){
dat <- cbind(mat[i,],y,covariate)
dat <- as.data.frame(dat)
colnames(dat) <- c("x","y",cvname)
p <- c(p, summary(lm(x~.,dat=dat))$coefficients[2,4])
}
q = p.adjust(p, "fdr")
pval <- cbind(pval,p)
qval <- cbind(qval,q)
}
colnames(qval) <- colnames(pval) <- cellName
rownames(qval) <- rownames(pval) <- dimnames(X_array)[[1]]
}

return(list(qval = qval, pval = pval))
}
69 changes: 53 additions & 16 deletions R/hello.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,55 @@
# Hello, world!
#
# This is an example function named 'hello'
# which prints 'Hello, world!'.
#
# You can learn more about package authoring with RStudio at:
#
# http://r-pkgs.had.co.nz/
#
# Some useful keyboard shortcuts for package authoring:
#
# Install Package: 'Ctrl + Shift + B'
# Check Package: 'Ctrl + Shift + E'
# Test Package: 'Ctrl + Shift + T'
X = t(matrix(c(1:12),nrow=3,ncol=4))
theta = matrix(c(0.5,0,1,0.5,1,0),nrow=3,ncol=2)
P = array(0,
dim = c( nrow(X),
ncol(X),
ncol(theta)),
dimnames = list( rownames(X),
colnames(X),
colnames(theta))
)
R <- matrix(c(2,4,4,6,3,2,5,4),nrow=4,ncol=2)
derive_P2 <- function(X, theta, P_old,R,alpha){
## P_old: a tensor variable with three dimensions
## theta: the cell type proportions variable
## cell_type_index: optimize which type of cells
## R: reference matrix
dP1 <- dP2 <- array(0,
dim = c( nrow(X),
ncol(X),
ncol(theta)),
dimnames = list( rownames(X),
colnames(X),
colnames(theta))
)
for(cell_type_index in 1:ncol(theta)){
R.m <- as.matrix(R[,cell_type_index])

hello <- function() {
print("Hello, world!")
cell_type_seq <- c(1:ncol(theta))
cell_type_seq <- cell_type_seq[cell_type_seq!=cell_type_index]

X_summary = Reduce("+",
lapply(cell_type_seq, function(i) P_old[,,i]%*%diag(theta[,i]) )
)
X_summary <- X-X_summary

dP1[,,cell_type_index] <- 2*(P_old[,,cell_type_index]%*%diag(theta[,cell_type_index]) - X_summary)%*%diag(theta[,cell_type_index])
dP2[,,cell_type_index] <- 2*(as.matrix(rowMeans(P_old[,,cell_type_index]))-R.m)%*%t(as.matrix(rep((1/ncol(dP2[,,cell_type_index])),ncol(dP2[,,cell_type_index]))))
}
print(dP1)
print(dP2)
dP1 = dP1 / sqrt( sum( dP1^2 ) ) * 1e5
dP2 = dP2 / sqrt( sum( dP2^2 ) ) * 1e5

#calculate w1
#if( crossprod(as.matrix(dP1), as.matrix(dP2)) >= crossprod(as.matrix(dP1)) ) {w1 = 1}
#else if( crossprod(as.matrix(dP1), as.matrix(dP2)) >= crossprod(as.matrix(dP2)) ) {w1 = 0}
#else {
# w1 = crossprod(as.matrix(dP2-dP1), as.matrix(dP2))/sum((dP1-dP2)^2)
#}
w1 <- alpha
w2 <- 1-w1

dP <- dP1*as.numeric(w1) + dP2*as.numeric(w2)
return(dP)
}
37 changes: 37 additions & 0 deletions man/FindCSE_DEG.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 2afe55e

Please sign in to comment.