store() attach(support[!is.na(support$totcst) & support$totcst>0 & !is.na(support$race),]) race2 <- race levels(race2) <- list(other=c('asian','other')) table(race2) f <- ols(log(totcst) ~ rcs(age,4) + dzgroup + race2) an <- anova(f) library(display) w <- latex(an, fi='anova.tex') dd <- datadist(age, dzgroup, race2) options(datadist='dd') setps(costnomo, h=6, pointsize=14) nomogram(f, age=c(10,20,30,40,60,80,90,100), lp=F, fun=list('$ / 1000'=function(y)exp(y)/1000), fun.lp.at=log(1000*c(1,5,10,15,20,25,30,40,50,60,70)), force.label=T) dev.off() p <- predict(f, expand.grid(age=50, dzgroup='Coma', race2=levels(race2)), se.fit=T) options(digits=3) data.frame(race=levels(race2), p) contrast(f, list(age=50, dzgroup='Coma', race2=levels(race2)), type='average', weights=table(race2)) contrast(f, list(age=50, dzgroup='Coma', race2=levels(race2)), type='average') g <- ols(log(totcst) ~ rcs(age,4) + dzgroup) predict(g, data.frame(age=50, dzgroup='Coma'), se.fit=T) yhat <- predict(f) h <- ols(yhat ~ rcs(age,4) + dzgroup + race2, sigma=1) fastbw(h, aics=1000) w <- function(x,y){ s <- !is.na(x+y) x <- x[s]; y <- y[s] sse <- sum((x-y)^2) sst <- sum((y-mean(y))^2) r2 <- 1 - sse/sst er <- quantile(abs(x-y),.95,na.rm=T) names(er) <- NULL c(n=length(x),R2=r2, Error95=er) } h1 <- ols(yhat ~ age + dzgroup) abs.error.pred(lp=predict(h1), y=yhat) # med abs error .07 R2=.979 w(predict(h1), yhat) # .22 h2 <- ols(yhat ~ rcs(age,4) + dzgroup) abs.error.pred(lp=predict(h2), y=yhat) # med abs error .02 R2=.991 w(predict(h2), yhat) # .16 r <- ols(log(totcst) ~ rcs(age, 4) + dzgroup) cbind(h2$coef, r$coef) w(predict(r), log(totcst)) w(predict(h2),log(totcst)) r2 <- ols(log(totcst) ~ dzgroup) h3 <- ols(yhat ~ dzgroup) cbind(coef(r2),coef(h3)) ##---------------------------------------------------------------------- store() options(digits=5) n <- 500 rho <- .2 true <- c(seq(.1,1.3,by=.1),rep(.25,7)) true <- c(seq(.1,1.3,by=.1),rep(.6,7)) true <- c(seq(.1,1.3,by=.1),rep(.35,7)) true <- c(seq(.1,.9,by=.1),rep(.1,6),rep(0,5)) true <- c(seq(.1,1.3,by=.1),rep(.15,7)) sigma <- 9 nsim <- 25 nrep <- 3 mvrnorm <- function(n, p = 1, u = rep(0, p), S = diag(p)) { Z <- matrix(rnorm(n * p), p, n) t(u + t(chol(S)) %*% Z) } if(T) { X <- mvrnorm(n, p=20, S=diag(rep(1-rho,20))+rho) xbtrue <- matxv(X, true) for(j in 1:3) { y <- xbtrue + sigma*rnorm(n) g <- ols(y ~ X, x=T, y=T) print(g$stats) print(pentrace(g, seq(0,1000,by=100))$penalty) } } val <- function(fit) { yhat <- predict(fit) a <- sqrt(mean((xbtrue - yhat)^2)) yhat <- predict(fit, dv) b <- sqrt(mean((XvB - yhat)^2)) c(a,b) } find.approx <- function(fit, r2=.95) { lp <- predict(fit) form <- update(formula(fit), lp~.) f <- ols(form, sigma=1) b <- fastbw(f, aics=1e7) r <- b$result nr <- nrow(r) j <- max((1:nr)[r[,'R2'] >= r2]) factors <- dimnames(r)[[1]][j:nr] form <- as.formula(paste('lp ~',paste(factors,collapse='+'))) update(f, form) } bw <- function(f, sls=.05, type=c('individual','residual')) { type <- match.arg(type) b <- fastbw(f, type=type, sls=sls, rule='p') factors <- b$factors.kept yhat <- matxv(X[,factors,drop=F], b$coefficients) a <- sqrt(mean((xbtrue - yhat)^2)) yhat <- matxv(Xv[,factors,drop=F], b$coefficients) b <- sqrt(mean((XvB - yhat)^2)) c(a,b) } prn(true);prn(sigma);prn(nsim) Xv <- mvrnorm(10000, p=20, S=diag(rep(1-rho,20))+rho) XvB <- matxv(Xv, true) dv <- as.data.frame(Xv) xn <- paste('x',1:20,sep='') names(dv) <- xn res <- array(NA, dim=c(nrep,nsim,7,2), dimnames=list( paste('Rep',1:nrep), paste('Simulation',1:nsim), c('full','bw .05','bw .5','bw res .05', 'approx','full pen','approx pen'), c('Sample 1','Sample 2'))) comment(res) <- list(true=true, n=n, rho=rho, sigma=sigma) for(irep in 1:nrep) { prn(irep) X <- mvrnorm(n, p=20, S=diag(rep(1-rho,20))+rho) dimnames(X) <- list(NULL,xn) uncbind(X) xbtrue <- matxv(X, true) for(i in 1:nsim) { cat(i,'') y <- xbtrue + sigma*rnorm(n) f <- ols(y ~ x1+x2+x3+x4+x5+x6+x7+x8+x9+x10+x11+x12+x13+x14+x15+ x16+x17+x18+x19+x20) res[irep,i,1,] <- val(f) res[irep,i,2,] <- bw(f) res[irep,i,3,] <- bw(f, sls=.5) res[irep,i,4,] <- bw(f, sls=.5, type='res') f.approx <- find.approx(f) res[irep,i,5,] <- val(f.approx) a <- update(f, x=T, y=T) ap <- pentrace(a, seq(50,600,by=50)) prn(ap$penalty) f.pen <- update(f, penalty=ap$penalty) res[irep,i,6,] <- val(f.pen) f.pen.approx <- find.approx(f.pen) res[irep, i, 7,] <- val(f.pen.approx) } } storage.mode(res) <- 'single' result3 <- res store(result3,where='_Data') r <- range(res, na.rm=T) for(i in 1:7) { ti <- dimnames(res)[[3]][i] plot(res[1,,i,1],res[1,,i,2],xlab='Sample 1',ylab='Sample 2', xlim=r, ylim=r) abline(a=0,b=1) points(res[2,,i,1],res[2,,i,2], pch=3) points(res[3,,i,1],res[3,,i,2], pch=4) title(ti) ## boxplot(list(Rep1=res[1,,i,2],Rep2=res[2,,i,2],Rep3=res[3,,i,2])) ## title(ti) } for(w in list(result1,result3,result2)) { print(round(a <- apply(w[,,,1], 3, mean),2)) print(round(b <- apply(w[,,,2], 3, mean),2)) print(spearman(a,b)) } ## Function to reshape 3-dimensional array into a data frame res <- function(x) { y <- as.vector(x) d <- dimnames(x) data.frame(Rep=d[[1]][row(x)], Simulation=d[[2]][col(x)], Strategy=d[[3]][slice.index(x,3)], RMSE=y) } res(result1[,,,1]) z <- rbind(res(result1[,,,1]),res(result3[,,,1]),res(result2[,,,1])) z$betas <- c(rep('Beta 1',525),rep('Beta 2',525),rep('Beta 3',525)) z$Strategy <- ordered(z$Strategy, levels(z$Strategy)[rev(c(6,3,4,5,1,7,2))]) setps(simresults, trellis=T, h=10, pointsize=16) bwplot(Strategy ~ RMSE | betas, data=z, panel=panel.bpplot) dev.off() apply(res[,,,2], 3, mean) apply(res[,,,1], 3, median) apply(res[,,,1], 3, function(y)sqrt(var(as.vector(y))))