-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspecies_PCA.R
156 lines (137 loc) · 4.66 KB
/
species_PCA.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
source("/home/antortjim/MEGA/Master/PG/project/scripts/read_data.R")
subPlains <- c("boehmi", "borensis", "burchelli", "chapmani", "crawshayi", "grevys", "quagga")
# Eigen vector decomposition
# shit1 <- geno[,-(1:2)]
# shit1 <- select_SNPs(shit1)$geno
# E <- eigenstrat(shit1) # eigenstrat receives a matrix where rows = SNPs and columns = individuals
#E$vectors
# plot(1:61, abs(E$vectors[,1]))
# points(1:61, abs(E$vectors[,2]), pch = 19)
E <- eigenstrat(geno)
my_cols <- species
levels(my_cols) <- c("lightblue","Dark red","lightgreen")
# Plot PCA of all zebras
df <- data.frame(species = species,
subspecies = subspecies, idx = 1:63,
PCA1 = E$vectors[,1], PCA2 = E$vectors[,2])
ggplot(data = df, aes(x = PCA1, y = PCA2, col = species)) +
geom_point() +
scale_color_discrete(name = "Species",
breaks = c("grevys", "plains", "quagga"),
labels = c("Grevy's", "Plains", "Quagga")) +
theme(legend.text = element_text(size = 20),
legend.title = element_text(size = 25))
if (!interactive()) {
p
ggsave(filename = "figures/PCA_species.png",
plot = p,
height = height, width = width)
}
# rm(p)
# myData <- kde2d(df$PCA1,
# df$PCA2)
#
# x <- rep(myData$x, times = 25)
# y <- rep(myData$y, each = 25)
# z <- myData$z %>% as.numeric
# myResult <- data.frame(x = x, y = y, z = z)
#
# p <- ggplot(data = myResult, aes(x = x, y = y, z = z)) +
# geom_contour(bins = 2)
# p
#
# subsp <- c("boehmi", "borensis", "burchelli", "chapmani", "crawshayi")
# s <- "borensis"
#
# mySubset <- kde2d(filter(df, subspecies == s)$PCA1,
# filter(df, subspecies == s)$PCA2)
#
#
# x <- rep(mySubset$x, times = 25)
# y <- rep(mySubset$y, each = 25)
# z <- mySubset$z %>% as.numeric
# result1 <- data.frame(x = x, y = y, z = z)
#
# p <- ggplot(data = result, aes(x = x, y = y, z = z)) +
# geom_contour(bins = 2)
#
# p + layer(result1,
# mapping = aes(x = x, y = y, z = z),
# geom = "contour", params = list(bins =2)
#
# )
#
#
# for (s in subsp) {
# mySubset <- kde2d(filter(df, subspecies == s)$PCA1,
# filter(df, subspecies == s)$PCA2)
#
# x <- rep(df$x, times = 25)
# y <- rep(df$y, each = 25)
# z <- df$z %>% as.numeric
# result <- data.frame(x = x, y = y, density = z)
#
# p <- ggplot(data = result, aes(x = x, y = y, z = density)) +
# geom_contour(bins = 2)
# }
# p
#
#
#
# area <- p + geom_polygon(data = hulls, alpha = 0.5)
# area
## K means
# df <- full_lonlat %>% filter(species == "plains") %>% .[,c("lon", "lat")]
# clustering <- kmeans(df, centers = 5)
# temp <- cbind(cluster = clustering$cluster %>% as.factor,
# PCA1 = E$vectors[,1], PCA2 = E$vectors[,2],
# full_lonlat %>% filter(species == "plains"))
#
#
# hulls <- plyr::ddply(temp,
# "cluster",
# function(X) find_hull(df = X, x = "PCA1", y = "PCA2"))
#
#
# PCA_geography <- ggplot(data = temp,
# mapping = aes(x = PCA1, y = PCA2, fill = cluster, col = cluster)) +
# geom_point() +
# scale_fill_manual(values = myPalette) +
# scale_color_manual(values = myPalette) +
# geom_polygon(data = hulls,
# mapping = aes(x = PCA1, y = PCA2, col = cluster, fill = cluster),
# alpha = 0.5)
#
# hulls <- plyr::ddply(temp,
# "cluster",
# function(X) find_hull(df = X, x = "lon", y = "lat"))
#
# myAfrica <- ggmap(Africa) +
# geom_point(data = temp,
# mapping = aes(x = lon, y = lat, fill = cluster, col = cluster)) +
# scale_fill_manual(values = myPalette) +
# scale_color_manual(values = myPalette) +
# geom_polygon(data = hulls,
# mapping = aes(x = lon, y = lat, col = cluster, fill = cluster),
# alpha = 0.5) +
#
# theme(axis.line=element_blank(),axis.text.x=element_blank(),
# axis.text.y=element_blank(),axis.ticks=element_blank(),
# axis.title.x=element_blank(),
# axis.title.y=element_blank(),legend.position="none",
# panel.background=element_blank(),panel.border=element_blank(),panel.grid.major=element_blank(),
# panel.grid.minor=element_blank(),plot.background=element_blank())
#
# for (i in 1:length(fortified_countries)) {
# myAfrica <- myAfrica + geom_polygon(data = fortified_countries[[i]],
# aes(long, lat, group = group),
# color = "white", linetype = "longdash", size = 0.3, alpha = 0.5)
# }
#
#
# combined <- plot_grid(PCA_geography,
# myAfrica,
# labels = c("A", "B"),
# ncol = 2)
#
# combined