-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathkaggle_setup.R
76 lines (60 loc) · 1.76 KB
/
kaggle_setup.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
## kaggle_setup.R
## data import script
## connects to sqlite db
## pulls in dataframes
## August 25 2012
rm(list=ls())
setwd("~/Work/Kaggle/Data/")
library("RSQLite")
m = dbDriver("SQLite")
con <- dbConnect(m, dbname = "compDataAsSQLiteDB/compData.db")
# get ptprofile query
mq1 <- dbSendQuery(con, "select * from ptprofile")
md1 <- fetch(mq1, n = -1)
dbHasCompleted(mq1)
dbClearResult(mq1)
md1 = md1[,-c(6,7,9,11,13)]
names(md1)[6:9]= c("count.condition", "count.medication", "count.diagnosis", "count.transcript")
# get condition table
mq2 <- dbSendQuery(con, "select * from condition")
condition.df <- fetch(mq2, n = -1)
dbHasCompleted(mq2)
dbClearResult(mq2)
# get diagnosis table
mq3 <- dbSendQuery(con, "select * from training_diagnosis")
diagnosis.df <- fetch(mq3, n = -1)
dbHasCompleted(mq3)
dbClearResult(mq3)
# get smoking status query
q1 <- dbSendQuery(con, "select * from training_smoke")
d1 <- fetch(q1, n = -1)
dbHasCompleted(q1)
dbClearResult(q1)
# get patient transcripts
q2 <- dbSendQuery(con, "select * from training_patientTranscript")
d2 <- fetch(q2, n = -1)
dbHasCompleted(q2)
dbClearResult(q2)
d2$Height = as.numeric(d2$Height)
d2$dmIndicator = as.logical(d2$dmIndicator)
# get labs
q3 <- dbSendQuery(con, "select * from training_labs")
d3 <- fetch(q3, n = -1)
dbHasCompleted(q3)
dbClearResult(q3)
# get meds
q4 <- dbSendQuery(con, "select * from training_allMeds")
d4 <- fetch(q4, n = -1)
dbHasCompleted(q4)
dbClearResult(q4)
# get patient
q5 <- dbSendQuery(con, "select * from training_patient")
d5 <- fetch(q5, n = -1)
dbHasCompleted(q5)
dbClearResult(q5)
# clean up
dbDisconnect(con)
setwd("~/Work/Kaggle/")
write.csv(d3, file="Data/d3.csv", row.names=FALSE)
write.csv(d4, file="Data/d4.csv", row.names=FALSE)
write.csv(d5, file="Data/d5.csv", row.names=FALSE)