Examples of Use of rms Functions
Survival Analysis and Nomogram Graphing for Prostate Dataset
require(rms)
getHdata(prostate)
library(trellis)
attach(prostate)
bone <- factor(bm,labels=c("no mets","bone mets"))
dd <- datadist(age, ap, bone)
options(datadist="dd")
dtime[dtime==0] <- .5
S <- Surv(dtime, status!="alive")
f <- psm(S ~ rcs(age,3) + rcs(log(ap),5) + bone, dist="gauss")
a <- anova(f)
latex(a)
plot(a, pch=1)
s <- summary(f)
page(s)
plot(s, col=1:5, xfrac=.45, cex.t=.9)
latex(f)
srv <- Survival(f)
srv5 <- function(x) srv(5, x)
nomogram(f, ap=c(.1,.5,1,seq(5,30,by=5)), xfrac=.53,
fun=srv5, funlabel="5y Survival", cex.var=.8)
Nomogram for Various Predicted Values from Parametric Survival Model for SUPPORT Dataset
getHdata(support)
dd <- datadist(support)
options(datadist='dd')
attach(support)
label(meanbp3) <- 'Mean Blood Pressure'
label(wblc3) <- 'White Blood Cell Count/1000'
label(hrt3) <- 'Heart Rate'
label(temp3) <- 'Temperature (C)'
label(crea3) <- 'Creatinine'
label(alb3) <- 'Albumin'
f <- psm(Surv(d.time,death) ~ dzclass + rcs(age,3) + rcs(meanbp3,6) +
rcs(hrt3,5) + rcs(temp3,5), dist='gaussian')
survfun <- Survival(f)
surv1 <- function(lp) survfun(1, lp)
surv5 <- function(lp) survfun(5, lp)
quant <- Quantile(f)
med <- function(lp) quant(.5, lp)
bar <- Mean(f)
at.surv <- c(.01,.05,seq(.1,.9,by=.1),.95,.99)
at.med <- c(.25,.5,1,2,5,10,20,40)
n <- nomogram(f, hrt3=c(0,30,60,80,100,125,150,175,200,225,250),
fun=list(surv1, surv5, med),
lp=F,
funlabel=c("1y Survival Probability",
"5y Survival Probability",
"Median Survival Time, y"),
fun.at=list(at.surv, at.surv, at.med),
lp.at=seq(-4,9,length=100))
plot(n)
Nomogram for Simulated Exponential Survival Data with Non-monotonic and Interaction Effects
n <- 7000
set.seed(201)
age <- 50 + 12*rnorm(n)
label(age) <- "Age"
sex <- 1 + (runif(n)<=.4)
table(sex)
levels(sex) <- c("Male", "Female")
cens <- 15*runif(n)
systolic.bp <- rnorm(n, 120, 17.5) #8.5)
#h <- .02*exp(.025*(age-50)+.8*(sex==2)+.24*((systolic.bp-120)/8.5)^2)
# was .04*(age-50)
table(sex)
h <- .01*exp(.5*((.025-.009*(sex==2))*(age-50)+.8*(sex==2)+.075*
ifelse(systolic.bp<100,.9,1)*((systolic.bp-100)/8.5)^2))
ft <- -log(runif(n))/h
e <- ifelse(ft<=cens,1,0)
print(table(e))
ft <- pmin(ft, cens)
units(ft) <- "Year"
Srv <- Surv(ft, e)
ddist <- datadist(age,sex,systolic.bp)
options(datadist="ddist")
f <- cph(Srv ~ age*sex + rcs(systolic.bp,4), surv=T)
survfun <- Survival(f)
surv3 <- function(lp) survfun(3, lp)
surv5 <- function(lp) survfun(5, lp)
quant <- Quantile(f)
med <- function(lp) quant(.5, lp)
at.surv <- c(seq(.1,.9,by=.1),.95,.99)
at.med <- c(0,.5,1,1.5,seq(2,14,by=2))
plot(nomogram(f, conf.int=F, fun=list(surv3, surv5, med), varname.label=F,
funlabel=c("3y Survival Probability",
"5y Survival Probability",
"Median Survival Time, y"),
fun.at=list(at.surv, at.surv, at.med)))
Nomogram for Stratified Cox Model Fitted on Simulated Exponential Survival Data
n <- 2000
set.seed(1)
age <- 50 + 12*rnorm(n)
label(age) <- "Age"
sex <- 1 + (runif(n)<=.4)
levels(sex) <- c("Male", "Female")
cens <- 15*runif(n)
h <- .02*exp(.04*(age-50)+.8*(sex==2))
ft <- -log(runif(n))/h
e <- ifelse(ft<=cens,1,0)
print(table(e))
ft <- pmin(ft, cens)
units(ft) <- "Year"
Srv <- Surv(ft, e)
age.dec <- cut2(age, g=10, levels.mean=T)
label(age.dec) <- "Age"
dd <- datadist(age, sex, age.dec)
options(datadist="dd")
f.np <- cph(Srv ~ strat(age.dec)+strat(sex), surv=T)
# surv=T speeds up computations, and confidence limits when
# there are no covariables are still accurate.
print(f.np)
f.noia <- cph(Srv ~ rcs(age,4)+strat(sex), x=T, y=T)
# get accurate C.L. for any age
# Note: for evaluating shape of regression, we would not ordinarily
# bother to get 3-year survival probabilities - would just use X * beta
# We do so here to use same scale as nonparametric estimates
print(f.noia)
options(digits=3)
latex(f.noia, file='', inline=T)
anova(f.noia)
f.ia <- cph(Srv ~ rcs(age,4)*strat(sex), x=T, y=T, surv=T)
print(f.ia)
z <- latex(f.ia, file="",inline=T)
anova(f.ia)
if(TRUE) {
# setps(spline.age.sex.ia.nomogram, pointsize=9)
surv <- Survival(f.ia)
surv.f <- function(lp) surv(3, lp, stratum="sex=Female")
surv.m <- function(lp) surv(3, lp, stratum="sex=Male")
quant <- Quantile(f.ia)
med.f <- function(lp) quant(.5, lp, stratum="sex=Female")
med.m <- function(lp) quant(.5, lp, stratum="sex=Male")
at.surv <- c(.01,.05,seq(.1,.9,by=.1),.95,.98,.99,.999)
at.med <- c(0,.5,1,1.5,seq(2,14,by=2))
nom <- nomogram(f.ia, fun=list(surv.m,surv.f,med.m,med.f),
funlabel=c("S(3 | Male)","S(3 | Female)",
"Median (Male)","Median (Female)"),
fun.at=list(c(.8,.9,.95,.98,.99),c(.1,.3,.5,.7,.8,.9,.95,.98),
c(8,12),c(1,2,4,8,12)))
plot(nom, col.grid=FALSE, lmgp=.2)
# dev.off()
options(digits=3)
latex(f.ia, file="") #printed surv this time
}
# setps(km.age.sex)
plot(Predict(f.np, age.dec, sex, time=3, loglog=TRUE), ylim=c(-5,-1))
# setps(spline.age.sex.noia)
ages <- seq(20, 80, by=4) # Evaluate at fewer points. Default is 100
# For exact C.L. formula n=100 -> much memory
plot(Predict(f.noia, age=ages, sex, time=3, loglog=TRUE), ylim=c(-5,-1))
Bootstrap Areas under Cox Survival Curve Estimate
S <- Surv(1:10)
x <- runif(10)
x2 <- runif(10)
df <- data.frame(x=.5, x2=.5) #covariable settings for prediction
S
f <- coxph(S ~ x + x2)
s <- survfit(f, df, type="kaplan-meier", conf.type="none")
note("Point estimate of mean:")
trap.rule(s$time, s$surv)
#Note: This function is defined in the main library
#trap.rule <- function(x, y) sum(diff(x) * (y[-1] + y[ - length(y)]))/2
n <- nrow(S)
B <- 50
area <- double(B)
for(i in 1:B) {
cat(i,"")
u <- sample(1:n, n, replace=T)
# f <- cph(S ~ x, surv=T, type="kaplan-meier", subset=u)
# s <- survest(f, df, conf.int=F, type="kaplan-meier")
f <- coxph(S ~ x, subset=u)
s <- survfit(f, df, type="kaplan-meier", conf.type="none")
area[i] <- trap.rule(s$time, s$surv)
}
describe(area)
se <- sqrt(var(area))
z <- qnorm(.975)
c(mean(area)-z*se, mean(area)+z*se)
quantile(area, c(.025,.975))
z <- qnorm(.95)
c(mean(area)-z*se, mean(area)+z*se)
quantile(area, c(.05,.95))
par(mfrow=c(2,2), oma=c(3,0,3,0))
hist(area, nclass=20)
maxcount <- max(hist(area, nclass=20, plot=F)$counts)
z <- density(area)
#Scale density estimate to coincide with histogram
lines(z$x, maxcount*z$y/max(z$y))
title("Histogram and Kernel Density Estimate")
Ecdf(area); title("Empirical Cumulative Distribution")
z <- qqnorm(area)
abline(lsfit(z$x, z$y))
title("Q-Q Plot")
boxplot(area); title("Box Plot")
mtitle("Distribution of Estimates of Mean",ll=paste(B,"bootstrap repetitions"))
Longhand Calculation of Variance of Effects in Cox Model from Simulated Data, With Strata Interacting with Regular Predictor
This should be compared with the Design package's
contrast.Design
function.
trt <- sample(c("a","b","c"), 300, TRUE)
trt <- factor(trt)
age <- runif(300,30,80)
symp <- factor(sample(1:4, 300, TRUE))
S <- Surv(runif(300))
f <- cph(S ~ trt*(age + strat(symp)))
d <- expand.grid(trt=levels(trt), symp=levels(symp), age=seq(30,80,by=10))
x <- predict(f, d, type="x")
betas <- f$coef
cov <- f$var # may have to delete non-slope elements depending on model
i <- d$trt=="a"; xa <- x[i,]; Symp <- d$symp[i]; Age <- d$age[i]
i <- d$trt=="b"; xb <- x[i,]
i <- d$trt=="c"; xc <- x[i,]
doit <- function(xd, lab) {
xb <- xd %*% betas
se <- apply((xd %*% cov) * xd, 1, sum)^.5
q <- qnorm(1-.01/2) # 0.99 confidence limits
lower <- xb - q * se; upper <- xb + q * se
#Get hazards ratios instead of linear effects
xb <- exp(xb); lower <- exp(lower); upper <- exp(upper)
#First elements of these agree with
#summary(f, symp="1", age=30, conf.int=.99)
par(mfrow=c(2,2),oma=c(3,0,3,0))
for(sx in levels(Symp)) {
j <- Symp==sx
errbar(Age[j], xb[j], upper[j], lower[j], xlab="Age",
ylab=paste(lab,"Hazard Ratio"), ylim=c(0,5))
title(paste("Symptom Level",sx))
abline(h=1, lty=2)
}
mtext(paste(lab,"Hazard Ratios"), outer=T,cex=2.5)
}
doit(xb - xa, "B:A")
doit(xc - xa, "C:A")
doit(xb - xa, "C:B")
Use of summary.rms for Plotting All Pairwise Hazard Ratios among Three Treatments
Written by Charlotte Nelson 1995. Compare with
contrast.rms
.
# cadindex vector...
cadi <- sort(unique(cadindex[!is.na(cadindex)]))
# time and event variables for cph fit...
ftime <- d.time.u
event <- cdeath.u
# treatment labels...
treat <- factor(trt, labels=c("Med","PTCA","CABG"))
units(ftime) <- "Year"
# function to produce hazard ratio plots...
doit2 <- function(i,j) {
tr <- c("Medical","PTCA","CABG")
plot(cadi, rep(1, length(cadi)), ylim=c(.25,2.5), xlab="CAD Index",
ylab=paste(tr[j],":",tr[i]," Hazard Ratio"), type="n")
title(paste(tr[i],"vs.",tr[j]))
abline(h=1)
for(cad in cadi) {
k <- if(i==1 & j==2)4 else 6
z <- summary(f, treat=i, cadindex=cad, est.all=F,conf.int=.99)
print(z)
z <- z[k,c("Effect","Upper 0.99","Lower 0.99")]
note("Point estimates for hazard ratios and 99% confidence intervals")
print(z)
points(cad, z[1])
errbar(cad, z[1], z[2], z[3], add=T)
}
}
# cph fit...
f<-cph(Surv(ftime,event)~treat*pmax(cadindex,37)+ef60+
age+catg(mitins)+sex+vasdz+
chfi+comorbid+newmiuns+cpainvar+mplogit+mclogit+pclogit)
print(f)
anova(f)
# Call hazard ratio plot function...
sub<-"main population - 99% CI"
doit2(1,2)
mtext(side=1,line=0,cex=.8,outer=T,sub,adj=0)
doit2(1,3)
mtext(side=1,line=0,cex=.8,outer=T,sub,adj=0)
doit2(2,3)
mtext(side=1,line=0,cex=.8,outer=T,sub,adj=0)
General Approach to Drawing Nomograms from non-rms and non-Design fits
A general approach is to get predicted values (linear predictors) from whatever method you are using. Then run the rms package's (replacement
for Design package, or you can still use Design)
ols
function to fit the predicted values with an R2 of 1.0. Then use
nomogram
on the
ols
fit.
Add the argument sigma=1 to
ols
so you won't have a problem with a mean squared error of zero. Be sure that the right hand side of the
ols
model is the same as that used in the original model (e.g., from survey regression including nonlinear terms and interactions).
There is a nomogram example using this approach in
Regression Modeling Strategies under model approximation.
Making Nomograms When There are Interactions Involving only Some of the Levels of a Factor
Grant Izmirlian at
mailto:izmirlig@mail.nih.gov kindly provided
this code