-
Notifications
You must be signed in to change notification settings - Fork 37
/
zgtools.R
96 lines (75 loc) · 2.59 KB
/
zgtools.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# fix the dependent problem
fixit <- function(){
# set the language to english
env <- Sys.getenv("LANGUAGE")
on.exit(Sys.setenv("LANGUAGE" = env))
Sys.setenv("LANGUAGE" = "en")
# get the history
file1 <- tempfile("Rrawhist")
savehistory(file1)
rawhist <- readLines(file1)
unlink(file1)
# get the package name
rawhist <- rawhist[grepl("^library|^require", rawhist)]
rawhist <- rawhist[length(rawhist)]
package.name <- strsplit(rawhist, "\\(|\\)")[[1]][2]
package.name <- gsub("'|\"","", package.name)
# get the dependent package
text <- try(library(package.name, character.only = TRUE))
# extract the package name from error message
package.name <- gsub(".*called ‘(.*?)’.*", "\\1", text[1])
# download package
if (!requireNamespace("BiocManager", quietly = TRUE)){
install.packages("BiocManager")
}
BiocManager::install(package.name)
}
# read the STAR count result and build a matrix
read.STAR <- function(dir){
files <- list.files(dir, pattern = "*_ReadsPerGene.out.tab", full.names = TRUE)
file <- names <- gsub("_ReadsPerGene.out.tab","",basename(files))
file <- names <- gsub("-"," <- ", file <- names)
df.list <- lapply(files, function(x) {
data.table::fread(x, skip=4)[, c("V1", "V2")]
}
)
df <- do.call(cbind,lapply(df.list, function(x){
x[,2]
}
))
mt <- as.matrix(df)
row.names(mt) <- df.list[[1]]$V1
colnames(mt) <- file <- names
mt
}
# Plot PCA without building a DESeq2 object
# Plot PCA without building a DESeq2 object
plotPCA <- function(mt, ntop = 500, group = NULL, blind = FALSE,
label = TRUE) {
if (is.data.frame(mt)){
mt <- as.matrix.data.frame(mt)
}
vsd <- DESeq2::vst(mt, blind = blind)
rv <- matrixStats::rowVars( vsd )
select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
pca <- prcomp(t(vsd[select, ]))
percentVar <- pca$sdev^2/sum(pca$sdev^2)
d <- data.frame(PC1 = pca$x[, 1], PC2 = pca$x[, 2])
d$sample <- row.names(d)
if (is.null(group)){
p <- ggplot(data = d, aes_string(x = "PC1", y = "PC2")) +
geom_point(size = 3)
} else{
d$group <- group
p <- ggplot(data = d, aes_string(x = "PC1", y = "PC2")) +
geom_point(aes_string(color="group"), size = 3)
}
if (label){
p <- p + ggrepel::geom_text_repel(aes_string(label="sample"))
}
p <- p +
xlab(paste0("PC1: ", round(percentVar[1] * 100), "% variance")) +
ylab(paste0("PC2: ", round(percentVar[2] * 100), "% variance")) +
coord_fixed()
return(p)
}