(r1) CategorizingContinuousCovariates < Main < Vanderbilt Biostatistics Wiki

You are here: Vanderbilt Biostatistics Wiki>Main Web>Seminars>RClinic>CategorizingContinuousCovariates (revision 1)~~EditAttach~~

library(Design)

x <- csv.get("data.csv", header=TRUE) # Data set is the one used in Puzzler 1 # --> data was simulated such that there is a nonlinear # interaction between sex and age

# MODEL TREATING ALL CONTINUOUS VARIABLES CONTINUOUS ff <- lrm(dz ~ sex*rcs(age, 4) + rcs(sysbp, 4), data=x) anova(ff) # Total D.F. = 10 # sex*age interaction is significant (p < 0.05)

# CATEGORIZE THE CONTINUOUS VARIABLES x <- upData(x, agecut = cut(age, breaks=quantile(subset(age, dz==0), probs=c(0, 0.25, 0.50, 0.75, 1), na.rm=TRUE)), sysbpcut = cut(sysbp, breaks=quantile(subset(sysbp, dz==0), probs=c(0, 0.25, 0.50, 0.75, 1), na.rm=TRUE)) )

# NOTE: agecut, sysbpcut, are factors ==> need them as factors

# MODEL TREATING ALL CONTINUOUS VARIABLES AS CATEGORICAL fcut <- lrm(dz ~ sex*agecut + sysbpcut, data=x) anova(fcut) # TOTAL D.F. = 10 # sex*age interaction no longer significant (p > 0.05)

# PLOT THE PREDICTED EFFECT OF AGE ACROSS SEX IN EACH MODEL dd <- datadist(x) options (datadist = 'dd') par(mfrow=c(1,2)) plot(ff, age=NA, sex=NA, conf.int=FALSE) plot(fcut, age=NA, sex=NA, conf.int=FALSE)

# VIEW WHAT'S ACTUALLY BEING PLOTTED print.plot.Design(plot(ff, age=NA, sex=NA, conf.int=FALSE)) # Adjusted to medians: sysbp = 120 print.plot.Design(plot(fcut, age=NA, sex=NA, conf.int=FALSE)) # Adjusted to most frequent level: sysbpcut=(117, 128]

# MAKE SURE BOTH MODELS ARE ADJUSTED TO THE SAME THINGS # --> median and percentile that spans the median # i.e., if fcut wasn't adjusted to the quantile that spanned 120, # would use: # par(mfrow=c(1,2)) # plot(ff, age=NA, sex=NA, conf.int=FALSE) # plot(fcut, age=NA, sex=NA, conf.int=FALSE, # sysbpcut="(117,128]")

# PUT THE TWO PLOTS TOGETHER plff <- plot(ff, age=NA, sex=NA, conf.int=FALSE) plfcut <- plot(fcut, age=NA, sex=NA, conf.int=FALSE)

par (mfrow =c(1,1), mar=c(5,5,4,2)+0.1, lwd=2) # First plot the spline regression results: age across sex plot(ff, age=NA, sex=NA, conf.int=FALSE, # ylim = range(c(range(plff$x.xbeta"log odds"), # range(plfcut$x.xbeta"log odds"))), ylim = c(-2, 2), label.curves=FALSE, lty=1, col=c("gray", "black")) # Now plot the categorical regression results: age across sex Fstep <- stepfun(x=with(x, quantile(subset(age, dz==0), prob=c(0.25, 0.50, 0.75), na.rm=TRUE)), y = subset(plfcut$x.xbeta, sex=="female")$"log odds") par(new=TRUE, col="gray", lty=2) lines(Fstep, xlim=c(0, 150)) Mstep <- stepfun(x=with(x, quantile(subset(age, dz==0), prob=c(0.25, 0.50, 0.75), na.rm=TRUE)), y = subset(plfcut$x.xbeta, sex=="male")$"log odds") par(new=TRUE, col="black", lty=2) lines(Mstep, xlim=c(0, 150)) par(new=TRUE, col="black", lty=1, lwd=1) legend(x=40, y=2, # y=max(c(range(plff$x.xbeta"log odds"), # range(plfcut$x.xbeta"log odds"))), xjust=0, yjust=1, legend = c("Female - Spline Regression", "Male - Spline Regression", "Female - Categorical Regression", "Male - Categorical Regression"), lty=rep(1:2, each=2), col = rep(c("gray", "black"), 2), lwd=2)

Topic revision: r1 - 08 Nov 2006, TheresaScott

Main

Department Home Page

Biostatistics Graduate Program

Vanderbilt University Medical Center

Biostatistics Webs
- Archive
- Main
- Sandbox
- System

Copyright © 2013-2022 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding Vanderbilt Biostatistics Wiki? Send feedback