-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathmake_samples.R
46 lines (32 loc) · 1.06 KB
/
make_samples.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# install.packages(ElemStatLearn)
rm(list=ls())
library(ElemStatLearn) # for data
data("prostate")
data("spam")
library(magrittr) # for piping
library(dplyr) # for handeling data frames
# Continous outcome:
prostate.train <- prostate %>%
filter(train) %>%
select(-train)
prostate.test <- prostate %>%
filter(!train) %>%
select(-train)
y.train <- prostate.train$lcavol
X.train <- prostate.train %>% select(-lcavol) %>% as.matrix
y.test <- prostate.test$lcavol
X.test <- prostate.test %>% select(-lcavol) %>% as.matrix
# Categorical outcome:
n <- nrow(spam)
train.prop <- 0.66
train.ind <- c(TRUE,FALSE) %>%
sample(size = n, prob = c(train.prop,1-train.prop), replace=TRUE)
spam.train <- spam[train.ind,]
spam.test <- spam[!train.ind,]
y.train.spam <- spam.train$spam
X.train.spam <- spam.train %>% select(-spam) %>% as.matrix
y.test.spam <- spam.test$spam
X.test.spam <- spam.test %>% select(-spam) %>% as.matrix
spam.dummy <- spam %>% mutate(spam=as.numeric(spam=='spam'))
spam.train.dummy <- spam.dummy[train.ind,]
spam.test.dummy <- spam.dummy[!train.ind,]