-
Notifications
You must be signed in to change notification settings - Fork 0
/
Titanic_Ensemble.R
156 lines (123 loc) · 11.5 KB
/
Titanic_Ensemble.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# Reading the Train and Test Data
Titanic <- read.csv("D:\\PGD Data Science\\Term 2\\Machine Learning\\Kaggle\\Titanic Prediction\\Titanic_Train.csv",
header = TRUE)
Titanic.Kaggle <- read.csv("D:\\PGD Data Science\\Term 2\\Machine Learning\\Kaggle\\Titanic Prediction\\Titanic_Test.csv",
header = TRUE)
# Packages
library(e1071)
# Survived As Factor
Titanic$Survived <- as.factor(Titanic$Survived)
# Sampling
Sample.Titanic <- sample.int(n = nrow(Titanic), size = floor(0.7*nrow(Titanic)),
replace = FALSE)
# Train and Test Data
Titanic.Train <- Titanic[Sample.Titanic,]
Titanic.Test <- Titanic[-Sample.Titanic,]
# Test Without Labels
Titanic.Train.Features <- Titanic.Train[,-2]
# Test Without Labels
Titanic.Test.Features <- Titanic.Test[,-2]
# Train Labels
Titanic.Train.Labels <- Titanic.Train[,2]
# Test Labels
Titanic.Test.Labels <- Titanic.Test[,2]
### Decision Tree Model ###
# Packages
library(tree)
# Decision Tree Model 1
Titanic_DT_1 <- tree(Survived ~ Sex + Age, data = Titanic.Train)
# Decision Tree 1 Prediction
Titanic_DT_1.Prediction <- predict(Titanic_DT_1, Titanic.Test, type = "class")
Titanic_DT_1.Prediction
Titanic.Test$DT1_Prediction <- Titanic_DT_1.Prediction
# Confusion Matrix DT 1
Titanic_DT1.ConfMat <- table(Titanic_DT_1.Prediction,
reference = Titanic.Test$Survived)
Titanic_DT1.ConfMat
# Accuracy of DT 1 Model
Titanic_DT1.Accuracy <- sum(diag(Titanic_DT1.ConfMat))/sum(Titanic_DT1.ConfMat)
Titanic_DT1.Accuracy
# Decision Tree Model 2
Titanic_DT_2 <- tree(Survived ~ Pclass + Sex + Age, data = Titanic.Train)
# Decision Tree 2 Prediction
Titanic_DT_2.Prediction <- predict(Titanic_DT_2, Titanic.Test,type = "class")
Titanic_DT_2.Prediction
Titanic.Test$DT2_Prediction <- Titanic_DT_2.Prediction
# Confusion Matrix DT 2
Titanic_DT2.ConfMat <- table(Titanic_DT_2.Prediction,
reference = Titanic.Test$Survived)
Titanic_DT2.ConfMat
# Accuracy of DT 1 Model
Titanic_DT2.Accuracy <- sum(diag(Titanic_DT2.ConfMat))/sum(Titanic_DT2.ConfMat)
Titanic_DT2.Accuracy
### Naive Bayes Model###
Titanic_NB <- naiveBayes(Survived ~ ., data = Titanic.Train)
# Predicting Naive Bayes Model on the Test Data
Titanic_NB.Prediction <- predict(Titanic_NB, Titanic.Test.Features, type = "class")
Titanic.Test$NB_Prediction <- Titanic_NB.Prediction
# Naive Bayes Confusion Matrix
Titanic_NB.ConfMat <- table(Titanic_NB.Prediction, Titanic.Test.Labels)
Titanic_NB.ConfMat
# Accuracy of the Naive Bayes Model
Titanic_NB.Accuracy <- sum(diag(Titanic_NB.ConfMat))/sum(Titanic_NB.ConfMat)
Titanic_NB.Accuracy
# Taking Majority of Predictions
Titanic.Test$PredictionMajority <- as.factor(ifelse(Titanic_DT_1.Prediction == 1 &
Titanic_DT_2.Prediction == 1 &
Titanic_NB.Prediction == 1, 1, ifelse(Titanic_DT_1.Prediction == 1 &
Titanic_DT_2.Prediction == 1 &
Titanic_NB.Prediction == 0, 1, ifelse(Titanic_DT_1.Prediction == 1 &
Titanic_DT_2.Prediction == 0 &
Titanic_NB.Prediction == 1, 1, ifelse(Titanic_DT_1.Prediction == 1 &
Titanic_DT_2.Prediction == 0 &
Titanic_NB.Prediction == 0, 0, ifelse(Titanic_DT_1.Prediction == 0 &
Titanic_DT_2.Prediction == 0 &
Titanic_NB.Prediction == 0, 0, ifelse(Titanic_DT_1.Prediction == 0 &
Titanic_DT_2.Prediction == 0 &
Titanic_NB.Prediction == 1, 0, ifelse(Titanic_DT_1.Prediction == 0 &
Titanic_DT_2.Prediction == 1 &
Titanic_NB.Prediction == 0, 0, ifelse(Titanic_DT_1.Prediction == 0 &
Titanic_DT_2.Prediction == 1 &
Titanic_NB.Prediction == 1, 1, 0)))))))))
# Majority Values into a variable
Titanic.TrainMajority <- Titanic.Test$PredictionMajority
# Modelling for the whole data (Test + Train of the Train File)
# Decision Tree 1
Titanic.Model_DT1 <- tree(Survived ~ Sex + Age, data = Titanic)
# Decision Tree 2
Titanic.Model_DT2 <- tree(Survived ~ Pclass + Sex + Age, data = Titanic)
# Naive Bayes
Titanic.Model_NB <- naiveBayes(Survived ~ ., data = Titanic)
# Predicting for the Test File
# Decision Tree 1
Titanic.Prediction_DT1 <- predict(Titanic.Model_DT1, Titanic.Kaggle, type = "class")
# Decision Tree 2
Titanic.Prediction_DT2 <- predict(Titanic.Model_DT2, Titanic.Kaggle, type = "class")
# Naive Bayes
Titanic.Prediction_NB <- predict(Titanic.Model_NB, Titanic.Kaggle, type = "class")
# Saving the three predictions into variables
Titanic.Kaggle$Model1_DT <- Titanic.Prediction_DT1
Titanic.Kaggle$Model2_DT <- Titanic.Prediction_DT2
Titanic.Kaggle$Model3_NB <- Titanic.Prediction_NB
# Majority rule for the Test File
Titanic.Kaggle$MajorityPrediction <- (ifelse(Titanic.Prediction_DT1 == 1 &
Titanic.Prediction_DT2 == 1 &
Titanic.Prediction_NB == 1, 1, ifelse(Titanic.Prediction_DT1 == 1 &
Titanic.Prediction_DT2 == 1 &
Titanic.Prediction_NB == 0, 1, ifelse(Titanic.Prediction_DT1 == 1 &
Titanic.Prediction_DT2 == 0 &
Titanic.Prediction_NB == 1, 1, ifelse(Titanic.Prediction_DT1 == 1 &
Titanic.Prediction_DT2 == 0 &
Titanic.Prediction_NB == 0, 0, ifelse(Titanic.Prediction_DT1 == 0 &
Titanic.Prediction_DT2 == 0 &
Titanic.Prediction_NB == 0, 0, ifelse(Titanic.Prediction_DT1 == 0 &
Titanic.Prediction_DT2 == 0 &
Titanic.Prediction_NB == 1, 0, ifelse(Titanic.Prediction_DT1 == 0 &
Titanic.Prediction_DT2 == 1 &
Titanic.Prediction_NB == 0, 0, ifelse(Titanic.Prediction_DT1 == 0 &
Titanic.Prediction_DT2 == 1 &
Titanic.Prediction_NB == 1, 1, 0)))))))))
# New Data Frame
Final.Prediction <- Titanic.Kaggle[,c(1,15)]
# Writing as CSV File
write.csv(Final.Prediction, file = "Titanic_Ensemble_Prediction.csv", row.names = FALSE)