forked from ruthkeogh/superlearner_survival_tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfunctions_pseudoobs.R
More file actions
167 lines (127 loc) · 4.81 KB
/
functions_pseudoobs.R
File metadata and controls
167 lines (127 loc) · 4.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#################################################################################
#################################################################################
#Super learner for time-to-event outcomes tutorial
#
#Method: pseudo observations super learner
#Sachs et al. 2019
#
#Implemented using superlearner, and making use of Michael Sach's code from https://github.com/sachsmc/pseupersims
#This file provides Sach's functions from his utils.R file,
#with minor modifications to remove the handling of competing events.
#################################################################################
#' Compute the time-varying cause-specific ROC curve using pseudo-observations
#'
#' @param X Vector of predictions
#' @param pseu Pseudo observations at a fixed time for the cause of interest
#'
calc_roc <-
function(X, pseu) {
## compute roc curve using pseudo values
cuts <- sort(unique(X))
cutin <- cuts[-c(1, length(cuts))]
res.0 <- rbind(c(1, 1), t(sapply(cutin, function(cth) {
c(sum(pseu * as.numeric(X > cth)) / sum(pseu),
sum((1 - pseu) * as.numeric(X > cth)) / sum(1 - pseu))
})), c(0, 0))
res <- as.data.frame(res.0)
res <- data.frame(lapply(res, function(x)
pmax(pmin(x, 1), 0)))
colnames(res) <- c("tpf", "fpf")
res
}
timedep_roc <- function(delta, Tout, MM, t.star = 10){
d.cox <- 1.0 * (delta == 1)
fit.m <- coxph(Surv(Tout, d.cox) ~ MM)
R.t <- Tout > t.star
risk <- predict(fit.m, type = "risk")
W.t <- sum(R.t * risk)
pi.k <- (R.t * risk) / W.t
cc.all <- sort(MM)
TP.t <- sapply(cc.all, function(cc) {
sum((MM > cc) * pi.k)
})
FP.t <- sapply(cc.all, function(cc) {
sum((MM > cc) * R.t) / sum(R.t)
})
data.frame(tpf = TP.t, fpf = FP.t)
}
calc_auc <- function(fpf, tpf) {
n <- length(fpf)
dx <- fpf[-n] - fpf[-1]
mid.y <- (tpf[-n] + tpf[-1]) / 2
sum(dx * mid.y)
}
calc_pAUC <- function(predictions, labels) {
if(length(unique(predictions)) < 3) return(.5)
inroc <- calc_roc(predictions, labels)
calc_auc(inroc$fpf, inroc$tpf)
}
#' Optimize the time varying AUC
#'
#' @param Z Matrix of predictions
#' @param Y Pseudo values for the cumulative incidence for the competing event of interest at a fixed time
optimize_auc <-
function(Z, Y) {
## Z is a matrix of predictions, Y are pseudo-observations
## compute the linear combination of Z that maximizes the AUC
tomax <- function(beta) {
beta <- beta
Xin <- ( Z %*% beta)
roc <- calc_roc(Xin, Y)
(1 - (calc_auc(roc$fpf, roc$tpf))) + 100 * sum(abs(beta))
}
trag <- optim(rep(0.01, ncol(Z)), tomax, method = "BFGS")
if (trag$convergence == 0) {
trag$par / sum(trag$par)
} else {
stop("Optimization of AUC failed to converge")
}
}
my.SuperLearner.control <- function(saveFitLibrary = TRUE, trimLogit = 0.001, ...) {
if(trimLogit > 0.5) {
warning('trimLogit must be less than 0.5, will replace with trimLogit = 0.001')
trimLogit <- 0.001
}
list(saveFitLibrary = saveFitLibrary, saveCVFitLibrary = FALSE, trimLogit = trimLogit, ...)
}
assignInNamespace("SuperLearner.control", my.SuperLearner.control, "SuperLearner")
#' Pseudo AUC SuperLearner method
#'
#' To be passed to the SuperLearner function
#'
#' @export
method.pseudoAUC <- function() {
out <- list(
# require allows you to pass a character vector with required packages
# use NULL if no required packages
require = NULL,
# computeCoef is a function that returns a list with three elements:
# 1) coef: the weights (coefficients) for each algorithm
# 2) cvRisk: the V-fold CV risk for each algorithm
# 3) optimizer: (optional) the result object from the optimization of the weights.
computeCoef = function(Z,
Y,
libraryNames,
obsWeights,
control,
verbose,
...) {
cvRisk <- apply(Z[control$timedex, ], 2, function(x) {
rocin <- calc_roc(x, Y[control$timedex])
1 - (calc_auc(rocin$fpf, rocin$tpf))
})
coef <- tryCatch(optimize_auc(Z[control$timedex, ], Y[control$timedex]), error = function(e) rep(NA, ncol(Z)),
warning = function(e) rep(NA, ncol(Z)))
out <- list(cvRisk = cvRisk,
coef = coef,
optimizer = NULL)
return(out)
},
# computePred is a function that takes the weights and the predicted values from each algorithm in the library and combines them based on the model to output the super learner predicted values
computePred = function(predY, coef, control, ...) {
out <- crossprod(t(predY), coef)
return(out)
}
)
invisible(out)
}