-
Notifications
You must be signed in to change notification settings - Fork 1
/
4a_coxProportionalHazard.R
77 lines (59 loc) · 2.84 KB
/
4a_coxProportionalHazard.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
################################################
# Submission FlowCAP IV #
# Step 4a - Cox Proportional Hazard regression #
################################################
FCSloc <- "/group/irc/shared/data/FlowCAPIV"
#############################################
# Load the required libraries and functions #
#############################################
library(survival)
fitModel <- function(survivalTime,status,features){
data <- list(time=survivalTime,status=status,x=features)
fit <- coxph(Surv(time,status)~x, data)
return(fit)
}
######################
# Read the meta data #
######################
meta_patients <- read.csv(paste(FCSloc,"MetaDataTrain.csv",sep="/"),as.is=TRUE)
nPatients<-nrow(meta_patients)
train_ids <- 1:191
train_survivalTime <- meta_patients[train_ids,"Survival.Time"]
train_survivalTime <- train_survivalTime - min(train_survivalTime) + 1 # To ensure all survival times are positive
train_status <- meta_patients[train_ids,"Status"]
##############################
# Read the selected features #
##############################
features <- read.csv(paste(FCSloc,"dambi","bestUncorrelatedFeatures.csv",sep="/"),as.is=TRUE)
rownames(features) <- features[,1]
features <- as.matrix(features[,-1])
selected_features <- 1:13 # We varied this number to find the optimal value
######################################
# Try leave one out cross-validation #
######################################
# Make a prediction for each sample in the training set
prediction <- NULL
print("Training_instance Survival_Time Leave-one-out_prediction")
for (i in train_ids) {
fold <- setdiff(train_ids,i)
fit <- fitModel(train_survivalTime[fold],train_status[fold],features[fold,selected_features])
toPredict <- list(time=train_survivalTime[i],status=train_status[i],x=features[i,selected_features,drop=F])
prediction <- c(prediction,predict(fit,toPredict))
print(paste(i,train_survivalTime[i],prediction[i]))
}
# Evaluate the result
fit <- fitModel(train_survivalTime,train_status,prediction)
pvalue <- summary(fit)$logtest[3]
cindex <- summary(fit)$concordance
correlation <- cor(prediction[train_status==1], train_survivalTime[which(train_status==1)])
# Print evaluation measures
print(paste("p value: ",pvalue," c-index: ",cindex[1]," correlation: ",correlation,sep=""))
#################################
# Generate results for test set #
#################################
fit <- fitModel(train_survivalTime,train_status,features[train_ids,selected_features])
toPredict <- list(time=rep(NA,383),status=rep(NA,383),x=features[,selected_features])
prediction <- predict(fit,toPredict)
meta_patients[,"Prediction"] <- prediction
write.csv(meta_patients,file=paste(FCSloc,"dambi","predictionCoxPH.csv",sep="/"))
print(paste("Predictions on training and test set are written to ",paste(FCSloc,"dambi","predictionCoxPH.csv",sep="/"),sep=""))