-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHW1.R
27 lines (22 loc) · 1.06 KB
/
HW1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#What I want to make salient with this visualization is:
#what is the chance that if I pick a gene at random, it will be expressed in many cells?
#Load MERFISH data
data <- read.csv('~/Dropbox/JHU/Courses/genomic-data-visualization/data/MERFISH_Slice2Replicate2_halfcortex.csv.gz')
#make matrix of gene expression data without position
gexp <- data[, 4:ncol(data)]
rownames(gexp) <- data[,1]
# for each gene, count how many cells express that gene
cellexp <- colSums(gexp != 0)
# Make histogram of prevalence of genes
# to make more salient visualization,
# plot x-axis in log10 to use orders of magnitude
# plot y-axis in proportion
library(ggplot2)
df <- data.frame(cellexp)
ggplot(data = df, mapping = aes(x=log10(cellexp))) +
geom_histogram(mapping = aes(y=stat(count/sum(count))),
binwidth = 1, bins = 4, boundary=0, closed="right",
color="black", fill="white") +
labs(title="Prevalence of Genes by Orders of Magnitude", x="log10(# of cells expressing a gene)", y = "proportion of genes")
#save as png
ggsave("gene_prevalence.png")