-
Notifications
You must be signed in to change notification settings - Fork 1
/
data_normalization_ScatterPlot.Rmd
84 lines (77 loc) · 2.19 KB
/
data_normalization_ScatterPlot.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
---
title: "Data_normalization_FPKM_Scatter Plot"
author: "Chengqi(Charley) Wang"
date: "2/3/2020"
output: html_document
---
<br/><br/>
####load the data
```{r}
setwd('~/Documents/work/paper_write/genome_training/R_training/')
df <- read.table('vehicle_drug_feature_counts.txt',
header = T, sep = '\t', row.names = 1)
```
<br/><br/>
####data normalization (FPKM)
```{r}
dfReads <- df[,6:9]
fpkm <- apply(dfReads, 2,
function(x){x/df$Length *10^9/sum(x)})
##change the col name
colnames(fpkm) <- c('c1','c2','t1','t2')
##save the data
write.table(fpkm, file = 'vehicle_drug_feature_counts.fpkm.txt',
row.names = T, col.names = T, sep = '\t',
quote = F)
```
<br/><br/>
#### compare the expression by scatter plot (biological replicates)
```{r}
plot(fpkm[,1],fpkm[,2])
## change to log2
plot(log2(fpkm[,1]), log2(fpkm[,2]))
## change the points color
plot(log2(fpkm[,1]), log2(fpkm[,2]),
col = 'red', pch = 19)
plot(log2(fpkm[,1]), log2(fpkm[,2]),
bg = 'red', pch = 21)
## change the lable
plot(log2(fpkm[,1]), log2(fpkm[,2]),
bg = 'red', pch = 21,
xlab = 'log2(control1_FPKM)',
ylab = 'log2(control2_FPKM)')
## change the direction of the number at y-axis
plot(log2(fpkm[,1]), log2(fpkm[,2]),
bg = 'red', pch = 21,
xlab = 'log2(control1_FPKM)',
ylab = 'log2(control2_FPKM)', las = 1)
##add regression line
id <- which(is.finite(log2(fpkm[,2])) & is.finite(log2(fpkm[,1])) )
nrow(fpkm)
length(id)
abline(lm(log2(fpkm[id,2]) ~ log2(fpkm[id,1])))
## change the width of the line
abline(lm(log2(fpkm[id,2]) ~ log2(fpkm[id,1])),
col = 'black', lwd = 3)
```
<br/><br/>
#### use ggplot2 to draw scatter plot
```{r}
library(ggplot2)
fpkm_c <- data.frame(c1 = as.numeric( fpkm[,1] ),
c2 = as.numeric( fpkm[,2] ) )
ggplot(fpkm_c, aes(x = c1, y = c2)) +
geom_point() +
geom_smooth(method = lm, se = F)
## with other color
ggplot(fpkm_c, aes(x = c1, y = c2)) +
geom_point(col = 'red') +
geom_smooth(method = lm, se = F, col = 'navy')
```
<br/><br/>
#### use ggplot2 to draw scatter plot with 2d density estimation
```{r}
ggplot(log2(fpkm_c), aes(x = c1, y = c2)) +
geom_point() +
geom_density_2d()
```