Add files via upload

WWXkenmo · Mar 15, 2022 · 2afe55e · 2afe55e
1 parent 5e9c827
commit 2afe55e
Show file tree

Hide file tree

Showing 5 changed files with 301 additions and 17 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: ENIGMA
 Type: Package
 Title: dEcoNvolutIon method based on reGularized MAtrix completion
-Version: 0.1.1
+Version: 0.1.5
 Author: Weixu Wang, Jun Yao
 Maintainer: Weixu Wang <[email protected]>
 Description: Improved estimation of cell type-specific gene expression through

diff --git a/NAMESPACE b/NAMESPACE
@@ -2,6 +2,7 @@
 
 export(ENIGMA_L2_max_norm)
 export(ENIGMA_trace_norm)
+export(FindCSE_DEG)
 export(batch_correct)
 export(cell_deconvolve_trace)
 export(clean_model)

diff --git a/R/FindCSE_DEG.R b/R/FindCSE_DEG.R
@@ -0,0 +1,209 @@
+#' @title Perform CTS-DEG analysis
+#'
+#' @description Analysis the differential expression genes for each cell type
+#' 
+#' @param object ENIGMA object
+#' @param FDR_control
+#' if use Wang et al., FDR controlled DEG analysis model. Default: TRUE
+#'
+#' @param FoldChange
+#' if output the FoldChange of each gene, if FALSE, then output the expression difference
+#'
+#' @param covariate
+#' The data.frame object contains the covariate information of each sample
+#'
+#' @return A list object contain the DEG results of each cell types
+#'
+#'
+#' @examples
+#' \dontrun{
+#' DEG = FindCSE_DEG(object,y)
+#' DEG = FindCSE_DEG(object,y,covariate=covariate)
+#' head(DEG$celltype1)
+#' }
+#'
+#' @reference
+#' Wang J, Roeder K, Devlin B. Bayesian estimation of cell type–specific gene expression with prior derived from single-cell data[J]. Genome research, 2021, 31(10): 1807-1818.
+#'
+#' @export
+FindCSE_DEG <- function(object,y,FDR_control = TRUE,covariate=NULL,FoldChange = FALSE){
+    ####
+    #identify the DEG of ENIGMA outputs require normalized profile
+    if(is.null(object@result_CSE_normalized)){
+        stop('CTS-DEG analysis required normalized profile!')
+    }
+    #convert sce object into 3-order array
+    Exp = sce2array(object)
+    cellName = dimnames(Exp)[[3]]
+    DEG_list = list()
+    if(FDR_control){result <- DEG_test(Exp,y,covariate)}else{result <- DEG_test2(Exp,y,covariate)}
+    ES_m <- EffectiveSize(Exp,y,FoldChange)
+    for(i in cellName){
+        Tab_m <- cbind(ES_m[,i],result$pval[,i],result$qval[,i])
+        if(FoldChange){colnames(Tab_m) <- c("FoldChange","pvalue","qvalue")}else{
+            colnames(Tab_m) <- c("ExpressionDifference","pvalue","qvalue")
+        }
+        DEG_list[[i]] <- Tab_m
+    }
+    DEG_list
+}
+
+EffectiveSize <- function(X_array,y,FoldChange=FoldChange){
+    if(FoldChange){
+        ### convert all profile into pseudo positive
+        X_array[X_array<0] <- 0
+        ES_m <- NULL
+        nCell = dim(X_array)[3]
+        cellName = dimnames(X_array)[[3]]
+        for(i in 1:nCell){
+            G = X_array[,,i]
+            FC <- apply(G,1,FCcall,y=y)
+            ES_m = cbind(ES_m,FC)
+        }
+        colnames(ES_m) <- cellName
+    }else{
+        ES_m <- NULL
+        nCell = dim(X_array)[3]
+        cellName = dimnames(X_array)[[3]]
+        for(i in 1:nCell){
+            G = X_array[,,i]
+            FC <- apply(G,1,DEcall,y=y)
+            ES_m = cbind(ES_m,FC)
+        }
+        colnames(ES_m) <- cellName
+    }
+    ES_m
+}
+
+FCcall <- function(g,y){
+    mean(g[y==1])/mean(g[y==0])
+}
+
+DEcall <- function(g,y){
+    mean(g[y==1]) - mean(g[y==0])
+}
+
+
+DEG_test <- function(X_array,y,covariate=NULL){
+    O = array(0,
+                  dim = c( dim(X_array)[1],
+                           dim(X_array)[3],
+                           dim(X_array)[2]),
+                  dimnames = list( dimnames(X_array)[[1]],
+                                   dimnames(X_array)[[3]],
+                                   dimnames(X_array)[[2]])
+    )
+	for(i in 1:dim(X_array)[3]){
+	  O[,i,] <- X_array[,,i]
+	}
+	###Using ANOVA+glm model to evaluate prediction performance
+	result <- test(O,y,covariate)
+    pval <- t(result$pval)
+    qval <- t(result$qval)
+    colnames(qval) <- colnames(pval) <- dimnames(X_array)[[3]]
+    rownames(qval) <- rownames(pval) <- dimnames(X_array)[[1]]
+	return(list(qval = qval, pval = pval))
+}
+
+
+get_pval <- function (pval, cell_type, K)
+{
+    pval0 = rep(NA, K)
+    names(pval0) = cell_type
+    names = intersect(names(pval), cell_type)
+    pval0[names] = pval[names]
+    return(pval0)
+}
+
+pval2qval <- function (pval, A, y, covariate = NULL)
+{
+    ng = nrow(A)
+    if (is.null(covariate))
+        pval1 = sapply(1:ng, function(g) try(summary(manova(t(A[g,
+            , ]) ~ y))$stats[1, "Pr(>F)"], silent = T))
+    else pval1 = sapply(1:ng, function(g) try(summary(manova(t(A[g,
+        , ]) ~ y + covariate))$stats[1, "Pr(>F)"], silent = T))
+    pval = pval[, !is.na(as.numeric(pval1))]
+    pval1 = na.omit(as.numeric(pval1))
+    qval1 = p.adjust(pval1, "fdr")
+    qval = pval
+    K = ncol(A)
+    for (i in 1:ncol(pval)) {
+        qval[, i] = 1
+        if (min(pval[, i], na.rm = T) < 0.05/K)
+            qval[, i][which.min(pval[, i])] = qval1[i]
+    }
+    return(qval)
+}
+
+
+test <- function (A, y, covariate = NULL)
+{
+    if (dim(A)[3] != length(y))
+        print("CSE estimates and y have different length")
+    if (!is.null(covariate))
+        if (dim(A)[3] != nrow(covariate))
+            print("CSE estimates and covariate have different number of samples/subjects")
+        else {
+            if (!is.null(rownames(covariate)) & any(rownames(covariate) !=
+                dimnames(A)[[3]]))
+                covariate = covariate[dimnames(A)[[3]], ]
+        }
+    K = ncol(A)
+    cell_type = colnames(A)
+    if (is.null(covariate))
+        pval = apply(A, 1, function(x) {
+            pval = coef(summary(glm(y ~ ., data = data.frame(t(x)),
+                family = "binomial")))[, 4]
+            return(get_pval(pval, cell_type, K))
+        })
+    else pval = apply(A, 1, function(x) {
+        pval = coef(summary(glm(y ~ ., data = data.frame(t(x),
+            covariate), family = "binomial")))[, 4]
+        return(get_pval(pval, cell_type, K))
+    })
+    qval = pval2qval(pval, A, y, covariate)
+    return(list(qval = qval, pval = pval))
+}
+
+DEG_test2 <- function(X_array,y,covariate=NULL){
+    dims_ct <- dim(X_array)[3]
+    cellName <- dimnames(X_array)[[3]]
+    if(is.null(covariate)){
+    pval <- qval <- NULL
+    for(ct in 1:dims_ct){
+	mat <- X_array[,,ct]
+	p <- NULL
+	for(i in 1:nrow(mat)){
+	 p <- c(p, summary(lm(mat[i,]~as.numeric(y)))$coefficients[2,4])
+	}
+	q = p.adjust(p, "fdr")
+    pval <- cbind(pval,p)
+    qval <- cbind(qval,q)
+	}
+    colnames(qval) <- colnames(pval) <- cellName
+    rownames(qval) <- rownames(pval) <- dimnames(X_array)[[1]]
+
+    }else{
+        pval <- qval <- NULL
+        cvname <- colnames(covariate)
+        dims_ct <- dim(X_array)[3]
+            for(ct in 1:dims_ct){
+            mat <- X_array[,,ct]
+            p <- NULL
+            for(i in 1:nrow(mat)){
+                dat <- cbind(mat[i,],y,covariate)
+                dat <- as.data.frame(dat)
+                colnames(dat) <- c("x","y",cvname)
+                p <- c(p, summary(lm(x~.,dat=dat))$coefficients[2,4])
+            }
+            q = p.adjust(p, "fdr")
+            pval <- cbind(pval,p)
+            qval <- cbind(qval,q)
+	    }
+    colnames(qval) <- colnames(pval) <- cellName
+    rownames(qval) <- rownames(pval) <- dimnames(X_array)[[1]]    
+    }
+
+    return(list(qval = qval, pval = pval))
+}
diff --git a/R/hello.R b/R/hello.R
@@ -1,18 +1,55 @@
-# Hello, world!
-#
-# This is an example function named 'hello' 
-# which prints 'Hello, world!'.
-#
-# You can learn more about package authoring with RStudio at:
-#
-#   http://r-pkgs.had.co.nz/
-#
-# Some useful keyboard shortcuts for package authoring:
-#
-#   Install Package:           'Ctrl + Shift + B'
-#   Check Package:             'Ctrl + Shift + E'
-#   Test Package:              'Ctrl + Shift + T'
+X = t(matrix(c(1:12),nrow=3,ncol=4))
+theta = matrix(c(0.5,0,1,0.5,1,0),nrow=3,ncol=2)
+P = array(0,
+              dim = c( nrow(X),
+                       ncol(X),
+                       ncol(theta)),
+              dimnames = list( rownames(X),
+                               colnames(X),
+                               colnames(theta))
+)
+R <- matrix(c(2,4,4,6,3,2,5,4),nrow=4,ncol=2)
+derive_P2 <- function(X, theta, P_old,R,alpha){
+  ## P_old: a tensor variable with three dimensions
+  ## theta: the cell type proportions variable
+  ## cell_type_index: optimize which type of cells
+  ## R: reference matrix
+  dP1 <- dP2 <- array(0,
+                      dim = c( nrow(X),
+                               ncol(X),
+                               ncol(theta)),
+                      dimnames = list( rownames(X),
+                                       colnames(X),
+                                       colnames(theta))
+  )
+  for(cell_type_index in 1:ncol(theta)){
+    R.m <- as.matrix(R[,cell_type_index])
 
-hello <- function() {
-  print("Hello, world!")
+    cell_type_seq <- c(1:ncol(theta))
+    cell_type_seq <- cell_type_seq[cell_type_seq!=cell_type_index]
+
+    X_summary = Reduce("+",
+                       lapply(cell_type_seq, function(i) P_old[,,i]%*%diag(theta[,i]) )
+    )
+    X_summary <- X-X_summary
+
+    dP1[,,cell_type_index] <- 2*(P_old[,,cell_type_index]%*%diag(theta[,cell_type_index]) - X_summary)%*%diag(theta[,cell_type_index])
+    dP2[,,cell_type_index] <- 2*(as.matrix(rowMeans(P_old[,,cell_type_index]))-R.m)%*%t(as.matrix(rep((1/ncol(dP2[,,cell_type_index])),ncol(dP2[,,cell_type_index]))))
+  }
+  print(dP1)
+  print(dP2)
+  dP1 = dP1 / sqrt( sum( dP1^2 ) ) * 1e5
+  dP2 = dP2 / sqrt( sum( dP2^2 ) ) * 1e5
+
+  #calculate w1
+  #if( crossprod(as.matrix(dP1), as.matrix(dP2)) >= crossprod(as.matrix(dP1)) ) {w1 = 1}
+  #else if( crossprod(as.matrix(dP1), as.matrix(dP2)) >= crossprod(as.matrix(dP2)) ) {w1 = 0}
+  #else {
+  #    w1 = crossprod(as.matrix(dP2-dP1), as.matrix(dP2))/sum((dP1-dP2)^2)
+  #}
+  w1 <- alpha
+  w2 <- 1-w1
+
+  dP <- dP1*as.numeric(w1) + dP2*as.numeric(w2)
+  return(dP)
 }
diff --git a/man/FindCSE_DEG.Rd b/man/FindCSE_DEG.Rd