You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
library(NMF)
library(rsvd)
library(Rtsne)
library(ggplot2)
library(cowplot)
library(sva)
library(igraph)
library(cccd)
### Load all the required functions for this analysis
source("Fxns.R")
Download data and identify variable genes
Load UMI count data from GEO
## Downloading UMI count data
download.file("ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE92nnn/GSE92332/suppl/GSE92332_atlas_UMIcounts.txt.gz", destfile="GSE92332_atlas_UMIcounts.txt.gz")
## Reading UMI count data from fileatlas_umis= read.delim("GSE92332_atlas_UMIcounts.txt.gz")
info(sprintf("Data dimensions: %s" , paste(dim(atlas_umis), collapse="x")))
## 2017-11-17 11:49:52 INFO: Data dimensions: 15971x7216
Get variable genes
v= get.variable.genes(atlas_umis, min.cv2=100)
## 2017-11-17 11:49:57 INFO: Fitting only the 9723 genes with mean expression > 0.0330515521064302
## 2017-11-17 11:49:58 INFO: Found 3542 variable genes (p<0.05)
## take mean tpm across batches to show batch effectbatch_mean_tpm= group.means(counts=atlas_tpm, groups=batch.labels)
x=batch_mean_tpm[, 1]
y=batch_mean_tpm[,2]
expr.cor= round(cor(x,y),2)
smoothScatter(x, y, nrpoints=Inf, pch=16, cex=0.25, main=sprintf("Before batch correction, correlation between \ntwo illustrative batches is %s", expr.cor), xlab="All genes Batch 2, mean log2(TPM+1)", ylab="All genes Batch 1, mean log2(TPM+1)")
Compensate for batch effect using ComBat
# Takes a few minutesatlas_tpm_norm= batch.normalise.comBat(counts=atlas_tpm, batch.groups=batch.labels)
## Found 10 batches
## Adjusting for 0 covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
batch_mean_tpm_norm= group.means(counts=atlas_tpm_norm, groups=batch.labels)
x=batch_mean_tpm_norm[, 1]
y=batch_mean_tpm_norm[,2]
expr.cor= round(cor(x,y),2)
smoothScatter(x, y, nrpoints=Inf, pch=16, cex=0.25, main=sprintf("After batch correction, correlation between \ntwo illustrative batches is %s", expr.cor), xlab="All genes Batch 2, mean log2(TPM+1)", ylab="All genes Batch 1, mean log2(TPM+1)")
Dimensionality reduction
Run (randomized) PCA, t-SNE
pca= rpca(t(atlas_tpm_norm[var.genes,]), center=T, scale=T, retx=T, k=100)$x# or read PCA rotations used in the paper (since the alg is randomized)#pca = read.delim(file="atlas_pca_scores.txt")### run t-SNE#barnes_hut_tsne = Rtsne(pca[, 1:13], check_duplicates=T, pca=FALSE, #dont run PCA again# initial_dims = 13, perplexity = 20, max_iter = 100000, verbose=T, whiten=F)#tsne.rot = barnes_hut_tsne$Y# or read t-SNE rotations used in the paper (since the alg is randomized)tsne.rot= read.delim("atlas_tsne.txt")
Test for significant PCs. To avoid very long runtimes, run on a high memory server with lots of cores (n.cores).
y = sig.pcs.perm(dat=t(atlas_umis[var.genes,]), center=T, scale=T, max.pc=100, B=1000, n.cores=20, randomized=T)
PC permutation test completed.
13 PCS significant (p<0.05, 1000 bootstraps)
Runtime: 5110 s
Unsupervised clustering
Run kNN-graph clustering
# build cell-cell euclidean distance matrix using significant PC scoresdm= as.matrix(dist(pca[, 1:13]))
# build nearest neighbor graphknn= build_knn_graph(dm, k=200)
clustering= cluster_graph(knn)$partition# merge a spurious cluster (cluster 16 is only a single cell) into the most similar clusterclustering= merge_clusters(clustering, c(8, 16))
## Merging cluster 16 into 8 ..
## confirm that clusters are extremely similar to those in the paper (infomap is a random-walk based alg, so there may begetwd minor differences)clusters_from_paper=factor(unlist(lapply(colnames(atlas_umis), get_field, 3,"_")))
overlap= as.data.frame.matrix(table(clusters_from_paper, clustering))
overlap= round(sweep(overlap,2,colSums(overlap),`/`),2)
overlap=overlap[,apply(overlap, 1, FUN=which.max)]
aheatmap(overlap, color=cubehelix1.16, border_color=list("cell"="white"), txt=overlap, Colv=NA, Rowv=NA)
Visualize the clustering overlaid onto the t-SNE (Figure 1b)