-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathbmarketing.R
49 lines (37 loc) · 1.4 KB
/
bmarketing.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
library(tidyverse)
#################Loading data into the environment#################
bmarketing <- read.csv2("bmarketing.csv")
#Lets look at dataset and generate initial understanding about the column types
str(bmarketing)
summary(bmarketing)
# A quick check:
# If newdata has same number of observation that implies no NA value present
# is.na(bmarketing)
newdata <- na.omit(bmarketing)
nrow(newdata)==nrow(bmarketing)
#A deep check for a particular column let say age
if(length(which(is.na(bmarketing$y)==TRUE)>0)){
print("Missing Value found in the specified column")
} else{
print("All okay: No Missing Value found in the specified column")
}
# Let's find the range of individual variables
summary(bmarketing)
## ------------------------------------------------------------------------
bmarketing %>%
ggplot() + geom_histogram(aes(age), bins = 30) +
geom_vline(aes(xintercept= median(age)), color = "red")
# TODO: do boxplots for each data
# boxplot(duration~y,data=bmarketing_sub,col="red")
#################Decision Tree#################
library(rpart)
library(rpart.plot)
dt_model<- rpart(y ~ ., data = bmarketing)
rpart.plot(dt_model)
summary(dt_model)
#################Testing Decision Tree #################
predictions <- predict(dt_model, bmarketing, type = "class")
## Compute the accuracy
mean(bmarketing$y == predictions)
# Lets look at the confusion matrix
table(predictions, bmarketing$y)