You are here: Vanderbilt Biostatistics Wiki>Main Web>StatComp>RS>Design>DesignExamples (26 May 2010, FrankHarrell)EditAttach

Examples of Use of rms Functions

Survival Analysis and Nomogram Graphing for Prostate Dataset
Nomogram for Various Predicted Values from Parametric Survival Model for SUPPORT Dataset
Nomogram for Simulated Exponential Survival Data with Non-monotonic and Interaction Effects
Nomogram for Stratified Cox Model Fitted on Simulated Exponential Survival Data
Bootstrap Areas under Cox Survival Curve Estimate
Longhand Calculation of Variance of Effects in Cox Model from Simulated Data, With Strata Interacting with Regular Predictor
Use of summary.rms for Plotting All Pairwise Hazard Ratios among Three Treatments
General method for making nomograms from non-rms and non-Design model fits
Making Nomograms with Incomplete Interactions

Survival Analysis and Nomogram Graphing for Prostate Dataset

require(rms)
getHdata(prostate)
library(trellis)
attach(prostate)

bone <- factor(bm,labels=c("no mets","bone mets"))
dd <- datadist(age, ap, bone)
options(datadist="dd")

dtime[dtime==0] <- .5
S <- Surv(dtime, status!="alive")

f <- psm(S ~ rcs(age,3) + rcs(log(ap),5) + bone, dist="gauss")
a <- anova(f)
latex(a)
plot(a, pch=1)

s <- summary(f)
page(s)
plot(s, col=1:5, xfrac=.45, cex.t=.9)

latex(f)

srv <- Survival(f)
srv5 <- function(x) srv(5, x)
nomogram(f, ap=c(.1,.5,1,seq(5,30,by=5)), xfrac=.53,
    fun=srv5, funlabel="5y Survival", cex.var=.8)

Nomogram for Various Predicted Values from Parametric Survival Model for SUPPORT Dataset

getHdata(support)
dd <- datadist(support)
options(datadist='dd')

attach(support)
label(meanbp3)   <- 'Mean Blood Pressure'
label(wblc3)   <- 'White Blood Cell Count/1000'
label(hrt3)       <- 'Heart Rate'
label(temp3)   <- 'Temperature (C)'
label(crea3)   <- 'Creatinine'
label(alb3)       <- 'Albumin'

f <- psm(Surv(d.time,death) ~ dzclass + rcs(age,3) + rcs(meanbp3,6) +
         rcs(hrt3,5) + rcs(temp3,5), dist='gaussian')

survfun <- Survival(f)
surv1   <- function(lp) survfun(1, lp)
surv5   <- function(lp) survfun(5, lp)
quant   <- Quantile(f)
med     <- function(lp) quant(.5, lp)
bar     <- Mean(f)

at.surv <- c(.01,.05,seq(.1,.9,by=.1),.95,.99)
at.med  <- c(.25,.5,1,2,5,10,20,40)

n <- nomogram(f, hrt3=c(0,30,60,80,100,125,150,175,200,225,250),
         fun=list(surv1, surv5, med), 
         lp=F,
         funlabel=c("1y Survival Probability",
                    "5y Survival Probability",
                    "Median Survival Time, y"),
         fun.at=list(at.surv, at.surv, at.med),
         lp.at=seq(-4,9,length=100))
plot(n)

Nomogram for Simulated Exponential Survival Data with Non-monotonic and Interaction Effects

n <- 7000
set.seed(201)

age <- 50 + 12*rnorm(n)
label(age) <- "Age"
sex <- 1 + (runif(n)<=.4)
table(sex)
levels(sex) <- c("Male", "Female")
cens <- 15*runif(n)
 
systolic.bp <- rnorm(n, 120, 17.5)  #8.5)
#h <- .02*exp(.025*(age-50)+.8*(sex==2)+.24*((systolic.bp-120)/8.5)^2)
# was .04*(age-50)
table(sex)
h <- .01*exp(.5*((.025-.009*(sex==2))*(age-50)+.8*(sex==2)+.075*
             ifelse(systolic.bp<100,.9,1)*((systolic.bp-100)/8.5)^2))

ft <- -log(runif(n))/h
e <- ifelse(ft<=cens,1,0)
print(table(e))
ft <- pmin(ft, cens)
units(ft) <- "Year"
Srv <- Surv(ft, e)
ddist <- datadist(age,sex,systolic.bp)
options(datadist="ddist")

f <- cph(Srv ~ age*sex + rcs(systolic.bp,4), surv=T)

survfun <- Survival(f)
surv3   <- function(lp) survfun(3, lp)
surv5   <- function(lp) survfun(5, lp)
quant   <- Quantile(f)
med     <- function(lp) quant(.5, lp)
at.surv <- c(seq(.1,.9,by=.1),.95,.99)
at.med  <- c(0,.5,1,1.5,seq(2,14,by=2))

plot(nomogram(f, conf.int=F, fun=list(surv3, surv5, med), varname.label=F,
         funlabel=c("3y Survival Probability",
                    "5y Survival Probability",
                    "Median Survival Time, y"),
         fun.at=list(at.surv, at.surv, at.med)))

Nomogram for Stratified Cox Model Fitted on Simulated Exponential Survival Data

n <- 2000
set.seed(1)
age <- 50 + 12*rnorm(n)
label(age) <- "Age"
sex <- 1 + (runif(n)<=.4)
levels(sex) <- c("Male", "Female")
cens <- 15*runif(n)
h <- .02*exp(.04*(age-50)+.8*(sex==2))
ft <- -log(runif(n))/h
e <- ifelse(ft<=cens,1,0)
print(table(e))
ft <- pmin(ft, cens)
units(ft) <- "Year"
Srv <- Surv(ft, e)
 
age.dec <- cut2(age, g=10, levels.mean=T)
label(age.dec) <- "Age"
dd <- datadist(age, sex, age.dec)
options(datadist="dd")
 
f.np <- cph(Srv ~ strat(age.dec)+strat(sex), surv=T)
# surv=T speeds up computations, and confidence limits when
# there are no covariables are still accurate.
print(f.np)
 
f.noia <- cph(Srv ~ rcs(age,4)+strat(sex), x=T, y=T)
# get accurate C.L. for any age
# Note: for evaluating shape of regression, we would not ordinarily
# bother to get 3-year survival probabilities - would just use X * beta
# We do so here to use same scale as nonparametric estimates
print(f.noia)
options(digits=3)
latex(f.noia, file='', inline=T)

anova(f.noia)
 
f.ia <- cph(Srv ~ rcs(age,4)*strat(sex), x=T, y=T, surv=T)
print(f.ia)
z <- latex(f.ia, file="",inline=T)
anova(f.ia)
if(TRUE) {
  # setps(spline.age.sex.ia.nomogram, pointsize=9)
  surv <- Survival(f.ia)
  surv.f <- function(lp) surv(3, lp, stratum="sex=Female")
  surv.m <- function(lp) surv(3, lp, stratum="sex=Male")
  quant <- Quantile(f.ia)
  med.f <- function(lp) quant(.5, lp, stratum="sex=Female")
  med.m <- function(lp) quant(.5, lp, stratum="sex=Male")
  at.surv <- c(.01,.05,seq(.1,.9,by=.1),.95,.98,.99,.999)
  at.med <- c(0,.5,1,1.5,seq(2,14,by=2))
  nom <- nomogram(f.ia, fun=list(surv.m,surv.f,med.m,med.f),
           funlabel=c("S(3 | Male)","S(3 | Female)",
                      "Median (Male)","Median (Female)"),
           fun.at=list(c(.8,.9,.95,.98,.99),c(.1,.3,.5,.7,.8,.9,.95,.98),
                       c(8,12),c(1,2,4,8,12)))
  plot(nom,  col.grid=FALSE, lmgp=.2)
  # dev.off()
 
  options(digits=3)
  latex(f.ia, file="")        #printed surv this time
}
 
# setps(km.age.sex)
plot(Predict(f.np, age.dec, sex, time=3, loglog=TRUE), ylim=c(-5,-1))
 
# setps(spline.age.sex.noia)
ages <- seq(20, 80, by=4)   # Evaluate at fewer points. Default is 100
                            # For exact C.L. formula n=100 -> much memory
plot(Predict(f.noia, age=ages, sex, time=3, loglog=TRUE), ylim=c(-5,-1))

Bootstrap Areas under Cox Survival Curve Estimate

S <- Surv(1:10)
x <- runif(10)
x2 <- runif(10)

df <- data.frame(x=.5, x2=.5)  #covariable settings for prediction
S

f <- coxph(S ~ x + x2)
s <- survfit(f, df, type="kaplan-meier", conf.type="none")
note("Point estimate of mean:")
trap.rule(s$time, s$surv)

#Note: This function is defined in the main library
#trap.rule <- function(x, y) sum(diff(x) * (y[-1] + y[ - length(y)]))/2

n <- nrow(S)
B <- 50
area <- double(B)
for(i in 1:B) {
   cat(i,"")
   u <- sample(1:n, n, replace=T)
#   f <- cph(S ~ x, surv=T, type="kaplan-meier", subset=u)
#   s <- survest(f, df, conf.int=F, type="kaplan-meier")
   f <- coxph(S ~ x, subset=u)
   s <- survfit(f, df, type="kaplan-meier", conf.type="none")   
   area[i] <- trap.rule(s$time, s$surv)
   }

describe(area)
se <- sqrt(var(area))
z <- qnorm(.975)
c(mean(area)-z*se, mean(area)+z*se)
quantile(area, c(.025,.975))
z <- qnorm(.95)
c(mean(area)-z*se, mean(area)+z*se)
quantile(area, c(.05,.95))

par(mfrow=c(2,2), oma=c(3,0,3,0))
hist(area, nclass=20)
maxcount <- max(hist(area, nclass=20, plot=F)$counts)
z <- density(area)
#Scale density estimate to coincide with histogram
lines(z$x, maxcount*z$y/max(z$y))
title("Histogram and Kernel Density Estimate")

Ecdf(area); title("Empirical Cumulative Distribution")

z <- qqnorm(area)
abline(lsfit(z$x, z$y))
title("Q-Q Plot")

boxplot(area); title("Box Plot")
mtitle("Distribution of Estimates of Mean",ll=paste(B,"bootstrap repetitions"))

Longhand Calculation of Variance of Effects in Cox Model from Simulated Data, With Strata Interacting with Regular Predictor

This should be compared with the Design package's contrast.Design function.

trt <- sample(c("a","b","c"), 300, TRUE)
trt <- factor(trt)
age <- runif(300,30,80)
symp <- factor(sample(1:4, 300, TRUE))

S <- Surv(runif(300))

f <- cph(S ~ trt*(age + strat(symp)))

d <- expand.grid(trt=levels(trt), symp=levels(symp), age=seq(30,80,by=10))
x <- predict(f, d, type="x")
betas <- f$coef
cov   <- f$var    # may have to delete non-slope elements depending on model

i <- d$trt=="a"; xa <- x[i,]; Symp <- d$symp[i]; Age <- d$age[i]
i <- d$trt=="b"; xb <- x[i,]
i <- d$trt=="c"; xc <- x[i,]

doit <- function(xd, lab) {
  xb <- xd %*% betas
  se <- apply((xd %*% cov) * xd, 1, sum)^.5
  q <- qnorm(1-.01/2)   # 0.99 confidence limits
  lower <- xb - q * se; upper <- xb + q * se
  #Get hazards ratios instead of linear effects
  xb <- exp(xb); lower <- exp(lower); upper <- exp(upper)
  #First elements of these agree with 
  #summary(f, symp="1", age=30, conf.int=.99)
  par(mfrow=c(2,2),oma=c(3,0,3,0))
  for(sx in levels(Symp)) {
    j <- Symp==sx
    errbar(Age[j], xb[j], upper[j], lower[j], xlab="Age", 
           ylab=paste(lab,"Hazard Ratio"), ylim=c(0,5))
    title(paste("Symptom Level",sx))
    abline(h=1, lty=2)
    }
  mtext(paste(lab,"Hazard Ratios"), outer=T,cex=2.5)
  }

doit(xb - xa, "B:A")
doit(xc - xa, "C:A")
doit(xb - xa, "C:B")

Use of summary.rms for Plotting All Pairwise Hazard Ratios among Three Treatments

Written by Charlotte Nelson 1995. Compare with contrast.rms.

# cadindex vector...
cadi <- sort(unique(cadindex[!is.na(cadindex)]))

# time and event variables for cph fit...
ftime <- d.time.u
event <- cdeath.u

# treatment labels...
treat <- factor(trt, labels=c("Med","PTCA","CABG"))

units(ftime) <- "Year"

# function to produce hazard ratio plots...
doit2 <- function(i,j) {
 tr <- c("Medical","PTCA","CABG")
 plot(cadi, rep(1, length(cadi)), ylim=c(.25,2.5), xlab="CAD Index",
      ylab=paste(tr[j],":",tr[i]," Hazard Ratio"), type="n")
title(paste(tr[i],"vs.",tr[j]))
abline(h=1)

for(cad in cadi) {
  k <- if(i==1 & j==2)4 else 6
  z <- summary(f, treat=i, cadindex=cad, est.all=F,conf.int=.99)
  print(z)
  z <- z[k,c("Effect","Upper 0.99","Lower 0.99")]
  note("Point estimates for hazard ratios and 99% confidence intervals")
  print(z)
  points(cad, z[1])
  errbar(cad, z[1], z[2], z[3], add=T)
  }
 }
# cph fit...
f<-cph(Surv(ftime,event)~treat*pmax(cadindex,37)+ef60+
            age+catg(mitins)+sex+vasdz+
            chfi+comorbid+newmiuns+cpainvar+mplogit+mclogit+pclogit)
print(f)
anova(f)

# Call hazard ratio plot function...
sub<-"main population - 99% CI"
doit2(1,2)
mtext(side=1,line=0,cex=.8,outer=T,sub,adj=0)
doit2(1,3)
mtext(side=1,line=0,cex=.8,outer=T,sub,adj=0)
doit2(2,3)
mtext(side=1,line=0,cex=.8,outer=T,sub,adj=0)

General Approach to Drawing Nomograms from non-rms and non-Design fits

A general approach is to get predicted values (linear predictors) from whatever method you are using. Then run the rms package's (replacement for Design package, or you can still use Design) ols function to fit the predicted values with an R2 of 1.0. Then use nomogram on the ols fit. Add the argument sigma=1 to ols so you won't have a problem with a mean squared error of zero. Be sure that the right hand side of the ols model is the same as that used in the original model (e.g., from survey regression including nonlinear terms and interactions).

There is a nomogram example using this approach in Regression Modeling Strategies under model approximation.

Making Nomograms When There are Interactions Involving only Some of the Levels of a Factor

Grant Izmirlian at mailto:izmirlig@mail.nih.gov kindly provided this code

Topic attachments
I	Attachment	Action	Size	Date	Who	Comment
R	fixup-nom.R	manage	9.7 K	26 May 2010 - 16:24	FrankHarrell	Nomogram adaptation from Grant Izmirlian

Topic revision: r4 - 26 May 2010, FrankHarrell

Main

Department Home Page

Biostatistics Graduate Program

Vanderbilt University Medical Center

Biostatistics Webs
- Archive
- Main
- Sandbox
- System

Copyright © 2013-2022 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding Vanderbilt Biostatistics Wiki? Send feedback