-
Notifications
You must be signed in to change notification settings - Fork 0
/
BlogFeedbackLinearRegression.R
78 lines (57 loc) · 1.5 KB
/
BlogFeedbackLinearRegression.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
library(caret)
library(ggplot2)
sample <- read.csv("BlogFeedback\\blogData_train.csv", header = FALSE)
colnames(sample)
summary(sample[51:54])
sapply(sample[51:54],sd)
#install.packages("moments")
library(moments)
skew <- apply(sample[51:54], 2, skewness)
print(skew)
correlations <- cor(sample[51:54])
print(correlations)
sample2<-sample[51:54]
par(mfrow=c(1,4))
for(i in 1:4) {
hist(sample2[,i], main=names(sample2)[i])
}
par(mfrow=c(1,4))
for(i in 1:4) {
plot(density(sample2[,i]), main=names(sample2)[i])
}
par(mfrow=c(1,4))
for(i in 1:4) {
boxplot(sample2[,i], main=names(sample2)[i])
}
library(caret)
library(ggplot2)
install.packages("corrplot")
library(corrplot)
par(mfrow=c(1,1))
corrplot(correlations, method="circle")
train <- read.csv("BlogFeedback\\blogData_train.csv", header = FALSE)
test <- read.csv("BlogFeedback\\blogData_test-2012.02.01.00_00.csv", header = FALSE)
summary(train)
idx <- createDataPartition(train$V281,
p=0.7, list=F)
trn <- data.frame(train[idx,])
tst <- data.frame(test[-idx,])
ggplot(mapping=aes(alpha=0.4)) +
geom_density(aes(V281, fill="red"), trn) +
geom_density(aes(V281, fill="blue"), tst) +
coord_cartesian(xlim = c(0, 50), ylim = c(0, 2)) +
theme_gray()
model <- lm(V281 ~ ., data = trn)
summary(model)
p <- predict(model, newdata = tst)
error <- p - tst$V281
sqrt(mean(error^2))
model <- train(
V281 ~ ., train,
method = "lm",
trControl = trainControl(
method = "cv", number = 10,
verboseIter = TRUE
)
)
model