-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLASSO_On_Selected_Data.R
92 lines (74 loc) · 3.11 KB
/
LASSO_On_Selected_Data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#-----------------------------------------------------------------------------
#RUN GLM ON ALL DATA - OUR BENCHMARK FOR BEST PERFORMANCE
x <- model.matrix(outcome~., data = SelectedData)[,-1]
idx <- 1:(dim(SelectedData)[2] - 1) #omit outcome column
cv.lasso <- cv.glmnet(x,SelectedData$outcome, alpha = 1,
data = SelectedData[,idx],
nfolds = insidefolds,
family = "binomial")
StatModel <- glmnet(x, SelectedData$outcome, alpha = 1,
family = "binomial",
lambda = cv.lasso$lambda.min) #Optimal lambda
#Setup prediction set on test data
x.test <- model.matrix(outcome ~., SelectedDataOutcome)[,-1]
probabilities <- StatModel %>% predict(newx = x.test, type = "response")
predicted.classes <- ifelse(probabilities > 0.5, 1, 0)
conf_matrix <- table(predicted.classes,SelectedDataOutcome$outcome)
colnames(conf_matrix) <- c(0,1) #rename so sensitivity & specificity functions work
sink(paste(save_path,'GLM_Summary', SelectedData_str,'.txt', sep = ''))
# Summary
print('Full Model Summary')
print(summary(StatModel))
print('Events per Variable')
print(eventsnumber/ (dim(SelectedData)[2] - 1) )
# Examine odds ratios and 95% CIs
print('Model Coefficients Odds')
modelcoefs <- exp(coef(StatModel))
print(exp(modelcoefs))
print('Model Coefficients CIs')
#print(exp(confint.default(StatModel)))
print('N/A')
# Model accuracy
print('Accuracy')
out_acc <- mean(predicted.classes == SelectedDataOutcome$outcome)
print(out_acc)
print('Sensitivity')
out_sen <- sensitivity(conf_matrix)
out_sen <- out_sen[,'.estimate'][[1]]
print(out_sen)
print('Specificity')
out_spec <- specificity(conf_matrix)
out_spec <- out_spec[,'.estimate'][[1]]
print(out_spec)
print('Brier Score')
#This function only works form glm objects need to manually calculate
#out_brier <- BrierScore(StatModel)
f_t <- predicted.classes
o_t <- SelectedDataOutcome$outcome
out_brier = mean(((f_t) - o_t)^2)
print(out_brier)
# Plot ROC curve and find AUC
roccurve3 <- roc(outcome ~ c(probabilities), data = SelectedDataOutcome)
out_auc <- auc(roccurve3)
out_auc <- out_auc[1]
print('AUC')
print(out_auc)
sink()
ggroc(roccurve3, legacy.axes = T) +
geom_abline(slope = 1 ,intercept = 0) + # add identity line
theme(
panel.background = element_blank(),
axis.title.x = element_text(size = 18, face = 'bold'),
axis.title.y = element_text(size = 18, face = 'bold'),
panel.border = element_rect(size = 2, fill = NA),
axis.text.x = element_text(size = 14, face = 'bold'),
axis.text.y = element_text(size = 14, face = 'bold')) +
xlab('1 - Specificity') +
ylab('Sensitivity') +
scale_x_continuous(breaks = seq(0,1,0.25), labels = seq(0,1,0.25)) +
scale_y_continuous(breaks = seq(0,1,0.25), labels = seq(0,1,0.25)) +
geom_text(x = 0.1, y = 1, colour = "black", size = 6,
label = paste('AUC: ', sprintf("%0.2f",out_auc), sep = '') )
ggsave(paste(save_path, 'LASSO_', SelectedData_str,'_ROC.pdf', sep = ''),device = 'pdf',
width = 20, height = 20, units = 'cm', dpi = 300)
#-----------------------------------------------------------------------------