Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
mannau committed May 3, 2019
0 parents commit 3e4a722
Show file tree
Hide file tree
Showing 2 changed files with 4,170 additions and 0 deletions.
50 changes: 50 additions & 0 deletions bmarketing.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
library(tidyverse)

#################Loading data into the environment#################
bmarketing <- read.csv2("bank.csv")

#Lets look at dataset and generate initial understanding about the column types
str(bmarketing)
summary(bmarketing)

# A quick check:
# If newdata has same number of observation that implies no NA value present
# is.na(bmarketing)
newdata <- na.omit(bmarketing)
nrow(newdata)==nrow(bmarketing)

#A deep check for a particular column let say age
if(length(which(is.na(bmarketing$y)==TRUE)>0)){
print("Missing Value found in the specified column")
} else{
print("All okay: No Missing Value found in the specified column")
}

# Let's find the range of individual variables
summary(bmarketing)

## ------------------------------------------------------------------------
bmarketing %>%
ggplot() + geom_histogram(aes(age), bins = 30) +
geom_vline(aes(xintercept= median(age)), color = "red")

# TODO: do boxplots for each data
# boxplot(duration~y,data=bmarketing_sub,col="red")

#################Decision Tree#################
library(rpart)
library(rpart.plot)
library(caret)

dt_model<- rpart(y ~ ., data = bmarketing)
rpart.plot(dt_model)
summary(dt_model)

#################Testing Decision Tree #################
predictions <- predict(dt_model, bmarketing, type = "class")

## Compute the accuracy
mean(bmarketing$y == predictions)

# Lets look at the confusion matrix
table(predictions, bmarketing$y)
Loading

0 comments on commit 3e4a722

Please sign in to comment.