The following R function fits a Cox proportional hazards (PH) model, checks the PH assumption (using Schoenfeld residuals and the R function cox.zph), if PH violated corrects with stratification, and then bootstraps this entire model selection process to allow computation of valid confidence intervals of the predicted survival.
This function is a work in progress, only contains the bare minimums, and may have to be altered for a specific analysis.
Any questions or comments can be sent to Bryan Shepherd, bryan.shepherd@vanderbilt.edu. I would like to thank Thomas Dupont, Terri Scott, and Frank Harrell for their programming advice (although, of course, any bugs are my fault, not theirs). ArgumentsThis function does not compute a confidence interval for a hazard ratio. Such a confidence interval could be constructed by altering the function -- it is just hard to make this general because the hazard ratio can really only be computed if the variable of interest is pre-specified and stratification is not allowed to occur over that variable, or if specific values of the variable are specified over which to compute all hazard ratios. Also of note, in its current state, predictors must be modeled as linear functions (cannot expand with splines). This feature should change sometime soon, but it is currently above my R programming ability.
coxphboot<-function(formula,number.strata,ph.crit.val,surv.time,surv.x,Nboots) {
formula<-as.formula(formula) mod<-coxph(formula) express1<-as.character(formula) S<-eval(parse(text=express1[2])) z<-predict(mod, type='terms') f.short<-coxph(S ~ z) phtest<-survival::cox.zph(f.short, transform='identity')$table
badcov<-0 if(phtest[ncol(z)+1,3]<ph.crit.val) { badcov<-which(phtest[,3]==min(phtest[1:ncol(z),3])) }
m<-Design(model.frame(formula)) X<-m[,-1] classes<-attributes(m)$Design$assume.code Xnew<-X Xnew[1,]<-surv.x
if (badcov>0) { if (classes[badcov]==5) { stratnum<-X[,badcov] tmp1<-strsplit(express1, split=" ") tmp13[1+(badcov-1)*2]<-"strata(stratnum)" express3<-formula(paste(express1[2],express1[1],paste(tmp13,collapse=""))) mod<-coxph(express3) } if (classes[badcov]==1) { stratnum<-create.strata(X[,badcov],number.strata) express2<-"+strata(stratnum)" express3<-as.formula(paste(express1[2],express1[1],express1[3],express2)) mod<-coxph(express3) } }if (badcov==0) { stuff<-summary(survival::survfit(mod,newdata=Xnew[1,])) surv.est<-stuff$surv[stuff$time==max(stuff$time[stuff$time<surv.time])] } if (badcov>0) { if (classes[badcov]==5){ stuff<-summary(survival::survfit(mod,newdata=Xnew[1,][-badcov])) surv.strat<-surv.x[badcov] } if (classes[badcov]==1){ stuff<-summary(survival::survfit(mod,newdata=Xnew[1,])) surv.strat<-create.strata(c(X[,badcov],surv.x[badcov]),number.strata)[length(X[,badcov])+1] } surv.est<-stuff$surv[stuff$time==max(stuff$time[stuff$time<surv.time& stuff$strata==paste("stratnum=",as.character(surv.strat),sep="")])& stuff$strata==paste("stratnum=",as.character(surv.strat),sep="")] }
surv.estb<-badcovb<-NULL for (j in 1:Nboots) { cat(" Bootstrap: ", j, "\n")
N<-length(X[,1]) bootsamp<-sample(c(1:N),N,replace=TRUE)
yb<-S[,1][bootsamp] db<-S[,2][bootsamp] Sb<-Surv(yb,db) Xb<-X[bootsamp,1:ncol(X)] rownames(Xb)<-1:N Xb2 <- data.frame(cbind(S = Sb, Xb))
formula1<-as.formula(paste("Sb~",express1[3])) modb <- coxph(formula1, data=Xb2) z<-predict(modb, type='terms') f.short<-coxph(Sb ~ z) phtest<-survival::cox.zph(f.short, transform='identity')$table
badcov<-0 if(phtest[ncol(z)+1,3]<ph.crit.val) { badcov<-which(phtest[,3]==min(phtest[1:ncol(z),3])) }
if (badcov>0) { if (classes[badcov]==5) { Xb2$stratnum<-Xb[,badcov] tmp1<-strsplit(express1, split=" ") tmp13[1+(badcov-1)*2]<-"strata(stratnum)" express3<-formula(paste("Sb~",paste(tmp13,collapse=""))) modb<-coxph(express3, data=Xb2) } if (classes[badcov]==1) { Xb2$stratnum<-create.strata(Xb[,badcov],number.strata) express2<-"+strata(stratnum)" express3<-as.formula(paste("Sb~", express1[3], express2)) modb<-coxph(express3, data=Xb2) } }if (badcov==0) { stuff<-summary(survival::survfit(modb,newdata=Xnew[1,])) surv.estb[j]<-stuff$surv[stuff$time==max(stuff$time[stuff$time<surv.time])] } if (badcov>0) { if (classes[badcov]==5){ stuff<-summary(survival::survfit(modb,newdata=Xnew[1,][-badcov])) surv.strat<-surv.x[badcov] } if (classes[badcov]==1){ stuff<-summary(survival::survfit(modb,newdata=Xnew[1,])) surv.strat<-create.strata(c(Xb[,badcov],surv.x[badcov]),number.strata)[length(Xb[,badcov])+1] } surv.estb[j]<-stuff$surv[stuff$time==max(stuff$time[stuff$time<surv.time& stuff$strata==paste("stratnum=",as.character(surv.strat),sep="")])& stuff$strata==paste("stratnum=",as.character(surv.strat),sep="")] }
tmp1<-strsplit(express1, " ") badcovb[j]<-badcov if (badcov>0) { badcovb[j]<-tmp13[1+(badcov-1)*2] } } return(surv.est,surv.estb,badcovb) }N<-2000
beta0<- -0.18 beta1<- -0.04 beta2<- 0.006 beta3<- 0.53 v<-0.693 lambda<-0.0312 yvar<-matrix(c(1.7943670, 7.475836, -0.7879104, 7.4758359, 43.013806, -1.2723471, -0.7879104, -1.272347, 64.7582702),3,3) ybar<-c(3.898058, 15.022914, 38.746296)
desmatrix <- rmvnorm(N, mean=ybar, sigma=yvar) v1<-desmatrix[,1] v2<-desmatrix[,2] v3<-desmatrix[,3] v4<-rbinom(N,1,1/(1+exp(.49+.143*v1-.00159*v2-.029*v3)))
u<-runif(N,0,1) t<-(-log(u)/(lambda*exp(beta0*v1+beta1*v2+beta2*v3+beta3*v4)))^(1/v) c<-rbeta(N,1.3,1)*120 y<-ifelse(t<c,t,c) d<-ifelse(t<c,1,0)
S<-Surv(y,d) v4<-factor(v4)
modstuff<-coxphboot(formula="S~v1+v2+v3+v4",number.strata=4,ph.crit.val=.1,surv.time=60,surv.x=c(4,15,38,1),Nboots=200)
modstuff$surv.est ## Estimate of survival at t=60, v1=4, v2=15, v3=38, and v4=1. quantile(modstuff$surv.estb,c(.025,.975)) ## 95% confidence interval table(modstuff$badcovb) ## Table of the frequency of stratification for each variable