-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathROH_final.R
84 lines (58 loc) · 2.58 KB
/
ROH_final.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
source("/home/antortjim/MEGA/Master/PG/project/scripts/read_data.R")
library("xtable")
df <- ROH_plotter(ROH, bim_ROH, unit = 1e6)
clean_df <- df %>% filter(len > 0)
df2 <- clean_df %>% group_by(subspecies, len) %>% dplyr::summarise(v = n()) %>% as.data.frame
df3 <- data.frame()
for(i in 1:nrow(df2)) {
for(j in 1:df2[i,]$v) {
df3 <- rbind(df3, df2[i, 1:2])
}
}
# ggplot(data = df3, aes(x = subspecies, y = len, fill = subspecies)) +
# geom_boxplot()
df2 <- clean_df %>% group_by(species, len) %>% dplyr::summarise(v = n()) %>% as.data.frame
df3 <- data.frame()
# Instead of having a variable that counts how many N times a similar tract shows up,
# copy the row N times and remove that variable
for(i in 1:nrow(df2)) {
for(j in 1:df2[i,]$v) {
df3 <- rbind(df3, df2[i, 1:2])
}
}
ROH_expectation <- df3 %>% group_by(species) %>% summarise(mu = mean(len))
ggplot(data = df3, aes(x = len, y = ..density.., fill = species, col = species)) +
geom_histogram(alpha = 0.25) + facet_wrap(~ species) +
geom_vline(data = ROH_expectation,
mapping = aes(xintercept = mu)) +
theme_bw() +
scale_x_continuous(name = "ROH length (Mb)") +
theme(axis.text = element_text(size = 20),
axis.title = element_text(size = 20, margin = margin(0, 20, 0, 0)),
legend.text = element_text(size = 20),
legend.title = element_text(size = 25),
strip.text.x = element_text(size = 23)) +
guides(color = FALSE, fill = FALSE) +
labs(y = "Density")
# ggtitle("Density") +
# theme(plot.title = element_text(hjust = -0.1, vjust = -0.2))
#plot.margin = rep(grid::unit(0.75,"in"),4))
xList <- xtableList(list(ROH_expectation))
ggsave("figures/ROH.png", width = width, height = height)
homocigosity_table <- plyr::rbind.fill(lapply(X = 1:nrow(geno) %>% as.list,
FUN = function(i) data.frame(SNP = i,
het = tabulate(bin = homo[i,] %>% as.numeric %>% as.factor, nbins = 2)[1],
homo = tabulate(bin = homo[i,] %>% as.numeric %>% as.factor, nbins = 2)[2])))
homocigosity_table$chr <- bim_sorted$chr
homocigosity_table$chr %>% table
homocigosity_table$homocigosity <- homocigosity_table$homo / 63
df <- homocigosity_table %>% arrange(homocigosity)
df$id <- 1:nrow(geno)
ggplot(data = df,
mapping = aes(x = id, y = homocigosity)) +
geom_line()
homo[,63]
homo[,-63]
#df %>% ggplot(aes(x, y)) +
# geom_tile(aes(fill = z), width = 1) +
# scale_fill_manual(values = myPalette)