store()
attach(support[!is.na(support$totcst) & support$totcst>0 &
               !is.na(support$race),])

race2 <- race
levels(race2) <- list(other=c('asian','other'))
table(race2)

f <- ols(log(totcst) ~ rcs(age,4) + dzgroup + race2)
an <- anova(f)
library(display)
w <- latex(an, fi='anova.tex')

dd <- datadist(age, dzgroup, race2)
options(datadist='dd')
setps(costnomo, h=6, pointsize=14)
nomogram(f, age=c(10,20,30,40,60,80,90,100),
         lp=F, fun=list('$ / 1000'=function(y)exp(y)/1000),
         fun.lp.at=log(1000*c(1,5,10,15,20,25,30,40,50,60,70)),
         force.label=T)
dev.off()

p <- predict(f, expand.grid(age=50, dzgroup='Coma', race2=levels(race2)),
             se.fit=T)
options(digits=3)
data.frame(race=levels(race2), p)

contrast(f, list(age=50, dzgroup='Coma', race2=levels(race2)),
         type='average', weights=table(race2))

contrast(f, list(age=50, dzgroup='Coma', race2=levels(race2)),
         type='average')

g <- ols(log(totcst) ~ rcs(age,4) + dzgroup)
predict(g, data.frame(age=50, dzgroup='Coma'), se.fit=T)

yhat <- predict(f)

h <- ols(yhat ~ rcs(age,4) + dzgroup + race2, sigma=1)
fastbw(h, aics=1000)

w <- function(x,y){
  s <- !is.na(x+y)
  x <- x[s]; y <- y[s]
  sse <- sum((x-y)^2)
  sst <- sum((y-mean(y))^2)
  r2 <- 1 - sse/sst
  er <- quantile(abs(x-y),.95,na.rm=T)
  names(er) <- NULL
  c(n=length(x),R2=r2, Error95=er)
}

h1 <- ols(yhat ~ age + dzgroup)
abs.error.pred(lp=predict(h1), y=yhat) # med abs error .07 R2=.979
w(predict(h1), yhat) # .22
h2 <- ols(yhat ~ rcs(age,4) + dzgroup)
abs.error.pred(lp=predict(h2), y=yhat) # med abs error .02 R2=.991
w(predict(h2), yhat) # .16

r <- ols(log(totcst) ~ rcs(age, 4) + dzgroup)
cbind(h2$coef, r$coef)
w(predict(r), log(totcst))
w(predict(h2),log(totcst))

r2 <- ols(log(totcst) ~ dzgroup)
h3 <- ols(yhat ~ dzgroup)
cbind(coef(r2),coef(h3))


##----------------------------------------------------------------------
store()
options(digits=5)
n <- 500
rho <- .2
true <- c(seq(.1,1.3,by=.1),rep(.25,7))
true <- c(seq(.1,1.3,by=.1),rep(.6,7))

true <- c(seq(.1,1.3,by=.1),rep(.35,7))
true <- c(seq(.1,.9,by=.1),rep(.1,6),rep(0,5))
true <- c(seq(.1,1.3,by=.1),rep(.15,7))
sigma <- 9
nsim <- 25
nrep <- 3

mvrnorm <- function(n, p = 1, u = rep(0, p), S = diag(p)) {
  Z <- matrix(rnorm(n * p), p, n)
  t(u + t(chol(S)) %*% Z)
}

if(T) {
  X <- mvrnorm(n, p=20, S=diag(rep(1-rho,20))+rho)
  xbtrue <- matxv(X, true)
  for(j in 1:3) {
    y <- xbtrue + sigma*rnorm(n)
    g <- ols(y ~ X, x=T, y=T)
    print(g$stats)
    print(pentrace(g, seq(0,1000,by=100))$penalty)
  }
}


val <- function(fit) {
  yhat <- predict(fit)
  a <- sqrt(mean((xbtrue - yhat)^2))
  yhat <- predict(fit, dv)
  b <- sqrt(mean((XvB - yhat)^2))
  c(a,b)
}

find.approx <- function(fit, r2=.95) {
  lp <- predict(fit)
  form <- update(formula(fit), lp~.)
  f <- ols(form, sigma=1)
  b <- fastbw(f, aics=1e7)
  r <- b$result
  nr <- nrow(r)
  j <- max((1:nr)[r[,'R2'] >= r2])
  factors <- dimnames(r)[[1]][j:nr]
  form <- as.formula(paste('lp ~',paste(factors,collapse='+')))
  update(f, form)
}

bw <- function(f, sls=.05, type=c('individual','residual')) {
  type <- match.arg(type)
  b <- fastbw(f, type=type, sls=sls, rule='p')
  factors <- b$factors.kept
  yhat <- matxv(X[,factors,drop=F], b$coefficients)
  a <- sqrt(mean((xbtrue - yhat)^2))
  yhat <- matxv(Xv[,factors,drop=F], b$coefficients)
  b <- sqrt(mean((XvB - yhat)^2))
  c(a,b)
}

prn(true);prn(sigma);prn(nsim)

Xv  <- mvrnorm(10000, p=20, S=diag(rep(1-rho,20))+rho)
XvB <- matxv(Xv, true)
dv  <- as.data.frame(Xv)
xn  <- paste('x',1:20,sep='')
names(dv) <- xn

res <- array(NA, dim=c(nrep,nsim,7,2),
             dimnames=list(
               paste('Rep',1:nrep),
               paste('Simulation',1:nsim),
               c('full','bw .05','bw .5','bw res .05',
                 'approx','full pen','approx pen'),
               c('Sample 1','Sample 2')))
comment(res) <- list(true=true, n=n, rho=rho, sigma=sigma)

for(irep in 1:nrep) {
  prn(irep)
  X <- mvrnorm(n, p=20, S=diag(rep(1-rho,20))+rho)
  dimnames(X) <- list(NULL,xn)
  uncbind(X)
  
  xbtrue <- matxv(X, true)


  for(i in 1:nsim) {
    cat(i,'')

    y <- xbtrue + sigma*rnorm(n)
    f <- ols(y ~ x1+x2+x3+x4+x5+x6+x7+x8+x9+x10+x11+x12+x13+x14+x15+
             x16+x17+x18+x19+x20)

    res[irep,i,1,] <- val(f)
    res[irep,i,2,] <- bw(f)
    res[irep,i,3,] <- bw(f, sls=.5)
    res[irep,i,4,] <- bw(f, sls=.5, type='res')
    
    f.approx <- find.approx(f)
    res[irep,i,5,] <- val(f.approx)
  
    a <- update(f, x=T, y=T)
    ap <- pentrace(a, seq(50,600,by=50))
    prn(ap$penalty)
    f.pen <- update(f, penalty=ap$penalty)
    res[irep,i,6,] <- val(f.pen)
  
    f.pen.approx <- find.approx(f.pen)
    res[irep, i, 7,] <- val(f.pen.approx)
  }
}
storage.mode(res) <- 'single'
result3 <- res
store(result3,where='_Data')

r <- range(res, na.rm=T)
for(i in 1:7) {
  ti <- dimnames(res)[[3]][i]
  plot(res[1,,i,1],res[1,,i,2],xlab='Sample 1',ylab='Sample 2',
       xlim=r, ylim=r)
  abline(a=0,b=1)
  points(res[2,,i,1],res[2,,i,2], pch=3)
  points(res[3,,i,1],res[3,,i,2], pch=4)
  title(ti)
  ##  boxplot(list(Rep1=res[1,,i,2],Rep2=res[2,,i,2],Rep3=res[3,,i,2]))
  ##  title(ti)
}

for(w in list(result1,result3,result2)) {
  print(round(a <- apply(w[,,,1], 3, mean),2))
  print(round(b <- apply(w[,,,2], 3, mean),2))
  print(spearman(a,b))
}

## Function to reshape 3-dimensional array into a data frame
res <- function(x) {
  y <- as.vector(x)
  d <- dimnames(x)
  data.frame(Rep=d[[1]][row(x)], Simulation=d[[2]][col(x)],
             Strategy=d[[3]][slice.index(x,3)], RMSE=y)
}

res(result1[,,,1])
z <- rbind(res(result1[,,,1]),res(result3[,,,1]),res(result2[,,,1]))
z$betas <- c(rep('Beta 1',525),rep('Beta 2',525),rep('Beta 3',525))
z$Strategy <- ordered(z$Strategy,
                      levels(z$Strategy)[rev(c(6,3,4,5,1,7,2))])
setps(simresults, trellis=T, h=10, pointsize=16)
bwplot(Strategy ~ RMSE | betas, data=z, panel=panel.bpplot)
dev.off()

apply(res[,,,2], 3, mean)
apply(res[,,,1], 3, median)
apply(res[,,,1], 3, function(y)sqrt(var(as.vector(y))))