Prediction Error in Cox Models Varying Number of Predictors

This program runs Cox fits of simulated data with exponential survival and 1-30 covariables. Each covariable is U(-.5,.5) and has a true regression coefficient of .25. Estimates the average error (over simulations and over all subjects in sample) in predicted 5-yr survival compared to actual 5-year survival. Also estimates average absolute error for subjects in lowest decile of predicted survival. Intercept is chosen so that underlying survival (with all x=mean=0) is .75 at 5 years. Censoring distribution is U(0, 10y). Avg. # deaths is around 180.

This is useful for developing rules of thumb such as the "15:1 rule."
# FE Harrell 14Aug91
nsim <- 10
n <- 250
ncpts <- c(seq(1,29,by=2),30)
#nsim <- 1
#ncpts <- c(5,10)
s5 <- .75   #desired underlying 5-year survival
b0 <- log(-log(s5)/5)
xdc <- matrix(runif(n*40),nrow=n)-.5
meanabs <- NULL
meanlow <- NULL

for(ncov in ncpts) {
 cat('Using first',ncov,' covariables\n')

 x <- xdc[,1:ncov,drop=F]
 beta <- rep(.25,ncov)
 xbeta <- b0 + matxv(x, beta)
 surv5 <- exp(-5*exp(xbeta))   #True 5-yr survival for each subject
 describe(surv5)

 #Now take beta, simulate several samples from this using
 #exponential survival model, fit, and look at differences in
 #5-year survival from original xbeta

 meand <- single(nsim)
 meanl <- meand
 sumd <- 0
  for(i in 1:nsim) {

  #Simulate survival times with exponential survival function
  #S(t) = exp(-exp(xbeta)*t)

  ftime <- -log(runif(n)) /exp(xbeta)
  cens <- runif(n, 0, 10)   #Censoring is uniform(0, 10y)
  dead <- rep(1,n)
  dead[ftime>cens] <- 0
  sumd <- sumd + sum(dead)
  ftime <- pmin(ftime, cens)
  units(ftime) <- "Year"

  f <- coxph(x, ftime, dead, surv=T, time.inc=5, pr=F)
  if(i==1)print(f)
  b <- f$coef
  newxbeta <- matxv(x, b)
  pred.s5 <- (f$surv.summary[2,1,1])^exp(newxbeta-f$center)
  if(i==1)describe(pred.s5)
  d <- abs(pred.s5-surv5)
  meand[i] <- mean(d)
  k <- pred.s5 <= quantile(pred.s5, .1)
  meanl[i] <- mean(d[k])
  #cat(ncov,i,meand[i],meanl[i],"\n")
  }
 cat(ncov,mean(meand),mean(meanl),"\n")
 cat("Avg. # deaths:",sumd/nsim,"\n")

 meanabs <- c(meanabs,mean(meand))
 meanlow <- c(meanlow,mean(meanl))
}
print(cbind(ncpts,meanabs,meanlow))
plot(ncpts,meanlow,xlab="Number of Covariables Fitted",
     ylab="Average Error",type="n",ylim=range(c(meanlow,meanabs)))
points(ncpts,meanlow,type="b",lty=2)
points(ncpts,meanabs,type="b",lty=1)
title("Average Error in Cox 5-year Survival Estimates")
title(sub="Solid: all pts  Dashed: pts in first decile of predicted",
      cex=.7,adj=0)
title(sub="n=750",adj=1)
print(cbind(ncpts,meanabs,meanlow))
Topic revision: r1 - 11 Jul 2004, FrankHarrell
 

This site is powered by FoswikiCopyright © 2013-2022 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding Vanderbilt Biostatistics Wiki? Send feedback