Analyses using the diabetes dataset (courtesy John Schorling, UVa)
[For demo, get data from /mshes/students/fharrell]

Understanding data - univariable looks
  library(Hmisc, T)
  datadensity(diabetes)
  hist.data.frame(diabetes)

  # Stratifies on gender (why not stratify on sex?)
  ecdf(diabetes, group=diabetes$gender, col=1:2, label.curve=F)

Examine missing data
  nac <- naclus(diabetes)
  plot(nac)
  naplot(nac)

Relationships among variables
  v <- varclus(~., data=diabetes)

Use recursive partitioning to impute body frame
(rpart from Atkinson & Therneau, Mayo Clinic)

  library(rpart)
  attach(diabetes)
  f <- rpart(frame ~ age + gender + height + weight + waist + hip)
  plot(f); text(f)

Once variables to be forced into a predictive model are decided upon,
rank them in the predictive potential to guide spending of d.f.

  r <- spearman2(glyhb ~ age + gender + height + weight + frame +
                 hdl + ratio + bp.1s + bp.1d)
  plot(f)

------------------------------------------------------------------------
Analysis using the titanic3 dataset

Univariable descriptive statistics

  page(describe(titanic3))
  datadensity(titanic3)

Descriptive statistics, 2-way stratification

  attach(titanic3)
  bwplot(sex ~ age | pclass, panel=panel.bpplot)

Look at missing data

  nac <- naclus(titanic3)
  plot(nac)
  f <- rpart(is.na(age) ~ sex + pclas + survived)
  plot(f); text(f)

Describe tendencies of response variable with separate stratification
by a series of predictors

  s <- summary(survived ~ age + sex + pclass + embarked + pmin(parch,3))
  options(digits=3)
  s
  plot(s)

Nonparametric regression, treating age as continuous

  plsmo(age, survived, datadensity=T)
  plsmo(age, survived, group=sex, col=1:2, datadensity=T)
  plsmo(age, survived, group=pclass, col=1:3, datadensity=T)
  plsmo(age, survived, group=interaction(sex,pclass),
        col=1:6, datadensity=T)

Logistic models

  library(Design,T)
  dd <- datadist(titanic3)
  options(datadist='dd')
  f <- lrm(survived ~ rcs(age,5)*sex+pclass, x=T, y=T)
  f
  plot(f)
  plot(f, age=NA, sex=NA, conf.int=F)
  anova(f)
  plot(anova(f))
  plot(summary(f), log=T)

Validate model for statistical indexes

  validate(f, B=10)  # usually use B=150

Validate model for calibration

  cal <- calibrate(f, B=10)
  plot(cal)

Make nomogram for obtaining predictions

  nomogram(f, fun=plogis, funlabel='Prob[survive]')

Make an S+ function for deriving predictions

  g <- Function(f)
  plogis(g(30,c('female''male'),'2nd'))

Make a dialog for obtaining predictions

  drep <- dataRep(~roundN(age,10)+sex+pclass)
  Dialog(fitPar('f', lp=F, fun=list('Pr[Survived]'=plogis)),
         basename='Titanic', datarep=drep)
  runmenu.Titanic()

Generate multiple imputations for age given survived, sex, pclass

  titanic.trans <- transcan(~survived + sex + pclass + age,
                            transformed=F, trantab=T, n.impute=10,
                            imputed=T)

Use these to fit 10 models on 10 completed datasets, average betas,
correct variances for multiple imputation

  fmi <- fit.mult.impute(formula(f), lrm, titanic.trans)
  plot(anova(fmi))