######################################### # Day 1 of Regression Modeling Strategies ######################################### # Assignment 3 library(Hmisc,T) library(Design,T) data.restore('/tmp/support.sdd') support <- cleanup.import(support) page(contents(support),multi=T) page(describe(support),multi=T) library(rpart) datadensity(support) hist.data.frame(support) ecdf(support, datadensity='hist',side=3) n <- naclus(support) plot(n) naplot(n) sub <- subset(support, is.na(totcst) | totcst>0) dim(sub) w <- is.na(totcst) ~ age + sex + dzgroup + scoma + meanbp + hospdead + charges s <- summary(w, data=sub) options(digits=3) s page(s) plot(s) f <- rpart(w, data=sub) plot(f);text(f) f$cptable sum(sub$age > 24.53 & sub$age < 26.2) # Estimate costs from charges (previous regression) sub <- upData(sub, totcst.i=ifelse(is.na(totcst), exp(.2550+.9244*log(charges)), totcst), labels=c(totcst.i='Total Cost Imputed from charges') ) page(contents(sub),multi=T) # Assignment 4 Prob. 1 g <- function(y) c(Mean=mean(y), Median=median(y)) s <- summary(totcst.i ~ age+sex+dzgroup+num.co+scoma+ race+meanbp, data=sub, fun=g) page(s) plot(s, which=1:2, pch=1:2, xlab='$') # Use show.pch() to see pch defns w <- spearman2(totcst.i ~ age+sex+dzgroup+num.co+scoma+ race+meanbp,data=sub,p=2) w plot(w) tapply(sub$totcst.i, sub$dzgroup, median, na.rm=T) ########################## # Day 2 ########################## library(Hmisc,T) library(Design,T) sample(c(10,11,13,17), 4, T) x <- 1:20 smean.cl.boot(x) # in Hmisc library smean.cl.normal(x) # Assignment 5: RMS Chp 7 Prob 2(a) v <- varclus(~., data=sub) plot(v) # (b) s <- spearman2(totcst.i ~ age+sex+dzgroup+ num.co+scoma+race+meanbp+hrt+temp+pafi+alb, data=sub) plot(s) # (c) done on Day 1 # (d) library(rpart) f <- rpart(is.na(pafi) ~ ., data=sub) plot(f);text(f) levels(sub$dzgroup) print(paste(letters[1:8], levels(sub$dzgroup),sep=':'),quote=F) describe(sub$race) # (2)e-f sub$race2 <- combine.levels(as.character( impute(sub$race)), minlev=.1) sub$pafi <- oldUnclass(impute(sub$pafi, 333.33)) sub$alb <- oldUnclass(impute(sub$alb, 3.5)) describe(sub[c('race2','pafi','alb')]) class(sub$pafi) attributes(sub$pafi) dd <- datadist(sub) options(datadist='dd') f <- ols(totcst ~ rcs(age,3)+sex+dzgroup+ pol(num.co,2)+pol(scoma,2)+race2+rcs(meanbp,4)+ rcs(hrt,4)+rcs(temp,5)+rcs(alb,5)+rcs(pafi,3), data=sub) qqnorm(resid(f)) # terrible non-normality! f <- update(f, log(totcst) ~ .) r <- resid(f) qqnorm(r) # almost a straight line qqline(r) bwplot(dzgroup ~ r, data=sub) xYplot(r ~ meanbp, method='quantile', data=sub,nx=100) table(sub$num.co);table(sub$scoma) a <- anova(f) page(a) plot(a,what='partial R2') par(mfrow=c(3,4)) plot(f, ref.zero=T, ylim=c(-1.5,.7)) par(mfrow=c(1,1)) s <- summary(f, antilog=T) plot(s, main='Cost Ratio', log=T) nomogram(f, fun=function(x)exp(x)/1000, funlabel='Median Cost ($/1000)', cex.var=.6, cex.axis=.5) Dialog(fitPar('f', lp=F, fun=list('Median Cost ($/1000)'=function(y)exp(y)/1000), fun.round=0)) runmenu.f() f <- update(f, x=T, y=T) f$x[1:5,] v <- validate(f, B=50) page(v) page(f) cal <- calibrate(f, B=50) plot(cal) g <- Function(f) page(g) exp(g(age=c(30,50,70, 90),sex='female')) ################################# # Day 3 ################################# library(Hmisc,T) library(Design,T) ?Design.trans ?scored ?Overview data.restore('/tmp/titanic3.sdd') titanic3 <- cleanup.import(titanic3) page(describe(titanic3),multi=T) dd <- datadist(titanic3) options(datadist='dd') options(digits=2) attach(titanic3) rm(sex) s <- summary(survived ~ age+sex+pclass+sibsp+parch) plot(s) plsmo(age, survived, datadensity=T) plsmo(age, survived, group=sex, col=1:2, datadensity=T) plsmo(age, survived, group=pclass, col=1:3, datadensity=T) plsmo(age, survived, group=interaction(pclass,sex),, col=1:6, datadensity=T) plsmo(age, survived, group=interaction(pclass, cut2(fare,g=3)), datadensity=T, col=1:9) v <- varclus(~age+sex+pclass+fare) plot(v) bwplot(pclass ~ fare, panel=panel.bpplot) w <- pclass=='1st' table(w) plsmo(age[w], survived[w], group=cut2(fare[w],g=4)) plsmo(fare[w], survived[w], group=cut2(age[w],g=4),datadensity=T) ecdf(fare) library(rpart) r <- rpart(fare ~ sex+age+pclass) plot(r);text(r) r <- rpart(survived ~ sex+age+ pclass+fare) plot(r);text(r) f1 <- lrm(survived ~ sex*pclass* rcs(age,5)+rcs(age,5)*(sibsp+parch)) page(anova(f1)) f <- lrm(survived ~ (sex + pclass + rcs(age,5))^2 + rcs(age,5)*sibsp) page(f) a <- anova(f) page(a); plot(a) print(a, 'sub') for(sx in levels(sex)) { plot(f, age=NA, pclass=NA, sex=sx) title(sx) } plot(f) # somewhat arbitrary b/c interactions f <- update(f, x=T,y=T) v <- validate(f, B=30) page(v) cal <- calibrate(f, B=10) plot(cal) f$stats (554-26)/554 who.na <- rpart(is.na(age) ~ sex+pclass+survived+sibsp+parch, minbucket=15) plot(who.na);text(who.na) m <- lrm(is.na(age) ~ sex*pclass + survived + sibsp + parch + rcs(fare,4)) plot(anova(m)) plot(m, fare=NA) xYplot(age ~ fare | pclass*sex) spearman2(fare ~ age) set.seed(17) xtrans <- aregImpute( ~ age + sex + pclass + sibsp + parch + survived, n.impute=5) page(xtrans) plot(xtrans) traceback() ?aregImpute .R.; .SV4. f.mi <- fit.mult.impute(formula(f), lrm, xtrans, data=titanic3) page(anova(f.mi)) plot(f.mi, age=NA, pclass=NA) s <- summary(f.mi, age=c(1,30), sibsp=0:1) plot(s,log=T) g <- Function(f.mi) page(g) page(sascode(g)) plogis(g(age=c(2,21,50),sex='male', pclass='3rd')) drep <- dataRep(~roundN(age,10)+ sex+pclass+roundN(sibsp,clip=0:1)) page(drep) Dialog(fitPar('f.mi',lp=F, fun=list('Prob[Survival]'=plogis)), limits='data', basename='Titanic', vary=list(sex=levels(sex)), datarep=drep) runmenu.Titanic() data.restore('/tmp/diabetes.sdd') diabetes <- cleanup.import(diabetes) plot(v <- varclus(~.,data=diabetes)) search()[1:5] page(v) detach(2) attach(diabetes) r <- rpart(frame ~ gender+height+ weight+waist+hip) probs <- predict(r, diabetes) mpc <- (t(apply(-probs,1,order)))[,1] frame.pred <- levels(frame)[mpc] table(frame,frame.pred) fr <- ifelse(is.na(frame), as.character(frame.pred), as.character(frame)) table(fr) frame <- fr na <- naclus(diabetes) naplot(na) bp.1s <- impute(bp.1s) chol <- impute(chol) weight <- impute(weight) hip <- impute(hip) set.seed(721) f <- areg.boot(glyhb ~ monotone(age) + bp.1s + chol + frame + monotone(weight) + monotone(hip),B=60) page(f) plot(f) plot(fitted(f), resid(f)) qqnorm(resid(f)) page(f) funs <- Function(f) names(funs) page(funs$glyhb) plot(glyhb, funs$glyhb(glyhb)) plot(1/glyhb, funs$glyhb(glyhb)) s <- summary(f, values=list( age=c(20,30,40,50,60,70,80)))