.First <- function(...){ library(Hmisc,T) library(Design,T) invisible() } .First() # will be automatic next time start S+ data.restore('/tmp/dumpdata.sdd') objects() # Renamed fev data frame to FEV using Object Explorer # (or FEV <- fev; rm(fev)) # Assignment 1 Prob. 6 plot(varclus(~.,data=FEV)) ecdf(~ age | smoke, groups=sex, data=FEV) bwplot(smoke ~ fev | sex, panel=panel.bpplot, data=FEV) plot(summary(height ~ age + sex + smoke, data=FEV)) xYplot(height ~ age | smoke, groups=sex, panel=panel.plsmo, lwd=4, data=FEV) # Prob. 7 xYplot(fev ~ age | sex, groups=smoke, panel=panel.plsmo, data=FEV) # Prob. 8 xYplot(fev ~ height | smoke, method='quantile',data=FEV) # Prob. 9 f <- ols(fev ~ age + height + sex + smoke, data=FEV) f options(width=70) anova(f) plot(anova(f),what='partial') plot.lm(f) # Prob. 11 attach(FEV) d <- expand.grid(age=12, smoke='non-current smoker', height=c(63,68), sex=levels(sex)) pred <- predict(f, d, conf.int=.95) cbind(d, pred) # Prob. 12 f <- ols(fev ~ sex*(height + age) + smoke) f options(width=120) an <- anova(f) print(an, 'dots') plot(an) # Assignment 3 detach(2) data.restore('/tmp/support.sdd') dim(support) attach(support[!is.na(support$totcst) & support$totcst > 0,]) describe(totcst) # Assignment 4 (end of chapter 7) g <- function(y) c(Mean=mean(y), Median=median(y)) s <- summary(totcst ~ age + sex + dzgroup + race + meanbp + pafi, fun=g) s plot(s, which=1:2, pch=1:2, xlab='$', cex.labels=.7) # Prob 2 spear <- spearman2(totcst ~ age + sex + dzgroup + num.co + scoma + race + meanbp + pafi + alb, p=2) plot(spear) # (d) library(rpart) f <- rpart(is.na(alb) ~ age + sex + dzgroup + scoma + race + meanbp + hrt + temp) plot(f);text(f) levels(dzgroup) na <- naclus(support) plot(na) naplot(na) # Also see na.pattern race <- impute(race) table(race[is.imputed(race)]) table(race) race <- combine.levels(race, minlev=.1) table(race) pafi <- impute(pafi, 333.3) alb <- impute(alb, 3.5) describe(alb) dd <- datadist(support) dd <- datadist(dd, race) options(datadist='dd') dd f <- ols(totcst ~ rcs(age,3)+sex+dzgroup+pol(num.co,2)+ pol(scoma,2)+race+rcs(meanbp,4)+rcs(hrt,4)+ rcs(temp,5)+rcs(alb,5)+rcs(pafi,3)) qqmath(~resid(f)|dzgroup) plot(fitted(f), resid(f)) f <- update(f, log(totcst)~.) f xYplot(resid(f) ~ fitted(f), method='quantile', nx=75) xYplot(resid(f) ~ fitted(f), method=smean.sdl, nx=75) bwplot(dzgroup ~ resid(f), panel=panel.bpplot) a <- anova(f) plot(a, what='partial') plot(a, what='remaining') # Show partial effects of each variable, common scales par(mfrow=c(1,1)) plot(f, ref.zero=T, ylim=c(-1.5,.7)) plot(f, dzgroup=NA, method='dot', fun=exp) plot(f, age=NA) plot(f, age=NA, sex=NA, conf.int=F) plot(f, age=NA, meanbp=NA, method='image') # (k) s <- summary(f, antilog=T) plot(s, main='Cost Ratio', log=T, cex=.75) # (l) nomogram(f, fun=function(x)exp(x)/1000, funlabel='Hospital Cost ($/1000)', cex.var=.6, cex.axis=.5) # Show median and mean predicted cost nomogram(f, fun=list(Mean=function(x)exp(x+.5*f$stats['Sigma']^2)/1000, Median=function(x)exp(x)/1000), fun.at=c(1,5,10,20,50,75,100,150,200), cex.var=.6, cex.axis=.5) g <- Function(f) g exp(g()) sascode(g) specs(f, long=T) names(f) f$assign f$terms f <- update(f, x=T, y=T) dim(f$x) f$x[1:10,] # Problem (m) val <- validate(f, B=100) val f cal <- calibrate(f, B=100) plot(cal, legend=list(x=10,y=9.4)) # Develop menu for predicted values and CL ?Dialog costfit <- ols(log(totcst) ~ dzgroup + rcs(age,4) + race + sex + rcs(meanbp,5) + pol(scoma,2)) hospmortfit <- lrm(hospdead ~ rcs(age,4) + sex + rcs(meanbp,5) + rcs(crea,4)) survivalfit <- cph(Surv(d.time, death) ~ dzgroup*rcs(age,4), surv=T) drep <- dataRep(~ dzgroup + roundN(age,20)) drep surv <- Survival(survivalfit) # Derive function to estimate S(t|Xbeta) surv1 <- function(lp) surv(365, lp) # Derive function to get S(1|Xbeta) surv2 <- function(lp) surv(365*2, lp) # Function to get S(2|Xbeta) survq <- Quantile(survivalfit) # Derive function to get S inverse survmed <- function(lp) survq(.5, lp) # Derive function for S inverse(.5) # At present, Dialog does not support stratification in cph when # predicting survival probabilities or survival times, unless strata # are specified explicitly when surv or survmed are called. surv Dialog(fitPar('costfit', lp=F, conf.type='both', fun=list('Median Cost ($/1000)'=function(y)exp(y)/1000), fun.round=0), fitPar('hospmortfit', lp=F, conf.type='mean', fun=list('Probability of Death in Hospital'=plogis),label=''), fitPar('survivalfit', lp=F, conf.type='none', fun=list('1-Year Survival'=surv1, '2-Year Survival'=surv2, 'Median Survival Time [days]'=survmed), fun.round=c(2,2,0), label='Survival Predictions'), vary=list(race=levels(race)), basename='SUPPORT', header='SUPPORT Predictive Models', prompts=c(scoma='SUPPORT Coma Score', sex='Sex'), defaultAuto='binfactor', limits='data', datarep=drep) runmenu.SUPPORT() ls() CallBack.SUPPORT search() detach(2) # Titanic Case Study data.restore('/tmp/titanic3.sdd') page(describe(titanic3), multi=T) datadensity(titanic3) attach(titanic3[,Cs(pclass,survived,age,sex,sibsp,parch)]) s <- summary(survived ~ age+sex+pclass+ cut2(sibsp,0:3)+cut2(parch,0:3)) s plot(s) plsmo(age,survived,datadensity=T) plsmo(age,survived,group=sex,datadensity=T) plsmo(age,survived,group=pclass,col=1:3,datadensity=T) plsmo(age,survived,group=interaction(sex, pclass),col=1:6,datadensity=T) f1 <- lrm(survived ~ sex*pclass*rcs(age,5)+ rcs(age,5)*(sibsp + parch)) f1 f1$stats page(anova(f1)) f <- lrm(survived ~ (sex + pclass + rcs(age,5))^2 + rcs(age,5)*sibsp) f$stats a <- anova(f) page(a) find(age) objects(1) plot(a) formula(f) f$terms dd <- datadist(titanic3) options(datadist='dd') g <- Function(f) page(g,multi=T) for(sx in levels(sex)) { plot(f, age=NA, pclass=NA, sex=sx,conf.int=F, fun=plogis) title(sx) } f <- update(f, x=T,y=T) validate(f, B=40) cal <- calibrate(f, B=25) plot(cal) naclus <- naclus(titanic3) plot(naclus) library(rpart) who.na <- rpart(is.na(age) ~ sex+pclass+ survived + sibsp + parch, minbucket=15) plot(who.na); text(who.na) m <- lrm(is.na(age) ~ sex*pclass+survived+ sibsp+parch) plot(anova(m)) set.seed(17) xtrans <- transcan(~I(age)+sex+pclass+ sibsp+parch+survived, n.impute=5, pl=F, trantab=T, imputed=T) summary(xtrans) attr(xtrans,"imputed")$age[1:10,] f.mi <- fit.mult.impute(formula(f), lrm, xtrans) #PROBLEM: FROM LAST FIT ONLY: f.mi$stats #Correct: coef(f.mi), f.mi$var # anova, plot, summary, ... all OK page(anova(f.mi), multi=T) plot(f.mi, fun=plogis) # Note CLs are correct for mult imp summary(f.mi, age=c(1,21,30), sex='male') summary(f.mi, age=30) contrast(f.mi, list(age=30,sex='female',pclass='1st',sibsp=2), list(age=10,sex='male',pclass='1st',sibsp=2)) formula(f.mi) detach(2) # Parametric survival analysis of SUPPORT acute <- support$dzclass %in% c('ARF/MOSF','Coma') table(acute) describe(support[acute,]) plot(varclus(~.,data=support[acute,])) dd <- datadist(support[acute,]) options(datadist='dd') attach(support[acute,]) years <- d.time/365.25 units(years) <- 'Year' S <- Surv(years, death) survplot(survfit(S~dzgroup), conf='none', fun=qnorm, logt=T) f <- psm(S ~ dzgroup + rcs(age,5) + rcs(meanbp,5), dist='gaussian') f <- update(f, y=T) S[1:10,] r <- resid(f) r[1:10,] survplot(r) survplot(r, dzgroup, label.curve=F) survplot(r, meanbp, label.curve=F) survplot(r, runif(length(age))) f plot(anova(f)) page(Hazard(f),multi=T) Quantile(f) Mean(f) help(air)