R Code Used in Analyzing German Breast Cancer Data

library(Design) Load(brca) latex(describe(brca)) datadensity(brca) pred <- ~chemo+hormon+x1+x2+x3+x4+x5+x6+x7 dd <- datadist(brca); options(datadist='dd') attach(brca) plot(varclus(pred)) par(mfrow=c(3,3)) w <- transcan(pred, data=brca) par(mfrow=c(1,1)) summary(w) library(lattice) bwplot(x2 ~ x1, panel=panel.bpplot, xlab=label(x1)) stripplot(x2 ~ x1) xYplot(sqrt(x6) ~ sqrt(x7)) rectime <- rectime/365.25 units(rectime) <- 'Year' S <- Surv(rectime, censrec=='recurrence')

g <- function(x) round(rcorr.cens(x, S)['Dxy'],3) xs <- paste('x', 1:7, sep='') dxy <- sapply(xs, function(z) g(get(z))); names(dxy) <- xs dxy dotchart2(-sort(-dxy), xlab=expression(D[xy]), reset.par=TRUE)

a <- aregImpute(~x1+x2+x3+x4+x5+x6+x7+rectime*censrec, n.impute=5,x=TRUE) tevent <- rectime*(censrec=='recurrence') a <- aregImpute(~x1+x2+x3+x4+x5+x6+x7+rectime + censrec + tevent, n.impute=5,x=TRUE)

d <- brca; d$tevent <- tevent f <- fit.mult.impute(S ~ chemo+hormon+rcs(x1,5)+x2+rcs(x3,5)+x4+rcs(x5,5)+rcs(x6,5)+rcs(x7,5), cph, a, tol=1e-13, data=d) plot(anova(f))

# Compare with dataset used in JRSSA article which excluded cases with NAs Load(breastcancer) g <- cph(Surv(rectime, censrec=='recurrence') ~ hormon+rcs(x1,5)+x2+rcs(x3,5)+x4+rcs(x5,5)+rcs(x6,5)+rcs(x7,5), tol=1e-13, data=breastcancer) par(mfrow=c(2,1)); plot(anova(f), main='Multiply Imputed') plot(anova(g), main='Casewise Deletion (published)') par(mfrow=c(1,1))

f <- cph(S ~ rcs(x1,5) + x2 + x3 + x4 + rcs(x5,5) + rcs(x6,4) + x7 + chemo + hormon, tol=1e-13) f$stats latex(anova(f)) f <- cph(S ~ pol(x1,2) + x2 + x3 + x4 + sqrt(x5) + sqrt(x6) + sqrt(x7) + hormon, x=TRUE, y=TRUE) zph <- cox.zph(f, transform='identity') par(mfrow=c(3,4)) plot(zph, resid=FALSE) par(mfrow=c(1,1)) fastbw(f) f <- cph(S ~ pol(x1,2) + sqrt(x5) + sqrt(x6) + hormon, x=TRUE, y=TRUE) cox.zph(f)

f <- cph(S ~ rcs(x1,5) + x2 + x3 + x4 + rcs(sqrt(x5),5) + rcs(sqrt(x6),4) + sqrt(x7) + hormon + chemo, surv=TRUE, time.inc=3, x=TRUE, y=TRUE) v <- validate(f, B=100, dxy=TRUE) options(digits=3) v pred3 <- survest(f, linear.predictors=f$linear.predictors, times=3)$surv cal <- calibrate(f, B=40, u=3, cuts=quantile(pred3, (1:5)/6)) plot(cal)

f <- fit.mult.impute(S ~ rcs(x1,5) + x2 + x3 + x4 + rcs(sqrt(x5),5) + rcs(sqrt(x6),4) + sqrt(x7) + hormon + chemo, cph, a, data=d, surv=TRUE)

f anova(f) plot(anova(f)) par(mfrow=c(3,3)) plot(f, ref.zero=TRUE, ylim=c(-1.5,1.5)) par(mfrow=c(1,1)) plot(f, x1=NA) plot(summary(f, vnames='labels'), log=TRUE) latex(f)

surv <- Survival(f) quant <- Quantile(f) quantile(rectime[censrec=='censored']) mn <- Mean(f, tmax=5) surv2 <- function(x) surv(times=2, lp=x) surv5 <- function(x) surv(times=5, lp=x) med <- function(x) quant(lp=x) nomogram(f, fun=list('2y Survival'=surv2, '5y Survival'=surv5, 'Median Lifelength'=med, 'Mean Restricted Life'=mn))
Topic revision: r1 - 09 Jul 2006, FrankHarrell
 

This site is powered by FoswikiCopyright © 2013-2022 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding Vanderbilt Biostatistics Wiki? Send feedback