\documentclass{article} % Use the geometry package to set the margins to 1in \usepackage[margin=1.0in]{geometry} % Set the project title and analyst's name \newcommand{\project}{Bosenberg's SED Data} \newcommand{\analyst}{M. S. Shotwell, PhD} \begin{document} % Sweave options \SweaveOpts{concordance=TRUE, keep.source=TRUE} % Tell Sweave to show images at 0.7 times the width of text \setkeys{Gin}{width=0.8\textwidth} % Create a title \begin{center} {\Huge \project} \\\vspace{0.25in} {\large\analyst}\end{center} \begin{figure}[h!] \begin{center} <>= rmvnorm <- function(n, mu, sigma) { p <- length(mu) z <- matrix(rnorm(n * p), n, p) t(mu + t(z %*% chol(sigma))) } resample <- function(df, iid=TRUE) { if(iid==TRUE) { ind <- sample(1:nrow(df), replace=TRUE) } else { ids <- sample(unique(df[[iid]]), replace=TRUE) ind <- unlist(sapply(ids, function(id) which(df[[iid]] == id))) } df[ind,] } # bootstrap the difference in mean absolute prediction error # for the fitted prediction rule versus Bosenbergs simplified rule boot_diff_mean_abs_err <- function(dat, fit, fitB, B=500) { replicate(B, { res <- resample(dat) err <- mean(abs(predict(fit, dat) - res$SED)) errB <- mean(abs(predict(fitB, dat) - res$SED)) errB - err }) } @ <>= # download and read SED data SEData <- read.csv(paste("http://biostat.mc.vanderbilt.edu/", "wiki/pub/Main/ReproducibleResearchTutorial/", "Bosenberg1995.csv", sep="")) # regress SED onto Weight SEDfit <- lm(SED ~ WT, data=SEData) # format the intercept (for later use with \Sexpr) SEDcoe <- format(coef(SEDfit), digits=2, nsmall=2) # fit Bosenberg's simplified prediction rule (for use with anova) SEDfitB <- lm(SED ~ 0 + offset(WT), data=SEData) # ANOVA for fitted rule versus simplified rule SEDanova <- anova(SEDfit, SEDfitB) # convenience values rWT <- range(SEData$WT) rSED <- range(SEData$SED) SEDa <- coef(SEDfit)[1] SEDb <- coef(SEDfit)[2] pWT <- data.frame(WT=seq(rWT[1], rWT[2], length.out=100)) # generate confidence and prediction bands SEDconf <- predict(SEDfit, pWT, interval="confidence") SEDpred <- predict(SEDfit, pWT, interval="prediction") # create a scatterplot plot(SEData$WT, SEData$SED, xlim=rWT, ylim=range(c(SEDa + SEDb * rWT, rWT, SEData$SED, SEDconf, SEDpred)), xlab="Weight (kg)", ylab="Skin to Epidural Distance (mm)") # add regression line lines(rWT, SEDa + SEDb * rWT) lines(pWT$WT, SEDconf[,2], lty=2) lines(pWT$WT, SEDconf[,3], lty=2) lines(pWT$WT, SEDpred[,2], lty=3) lines(pWT$WT, SEDpred[,3], lty=3) # add Bosenberg's simplified prediction rule lines(rWT, rWT, col="gray") # bootstrap the difference in mean absolute prediction error SEDerr <- format(quantile( boot_diff_mean_abs_err(SEData, SEDfit, SEDfitB), probs=c(0.5, 0.025, 0.975)), digits=2, nsmall=2) @ \caption{Scatterplot of weight versus skin-to-epidural distance (SED) for \Sexpr{nrow(SEData)} subjects. The best-fit line is overlaid in black. The estimated intercept and slope are \Sexpr{SEDcoe[1]}~{\small mm} and \Sexpr{SEDcoe[2]}~{\small mm/kg}, respectively. Dotted lines represent a 95\% pointwise prediction band. Dashed lines represent a 95\% pointwise confidence band for the fitted rule. The gray line represents Bosenberg's simplified prediction rule (1~{\small mm/kg}). Bosenberg (1995) argued that the simplified rule was valid because it fell within a 95\% prediction band (Bosenberg incorrectly uses the term ``confidence band'') for the fitted rule. However, a simultaneous test of the null hypothesis that the intercept is zero and the slope is one was significant (p-value = \Sexpr{format(SEDanova$P[2], nsmall=2, digits=2)}). Hence, there is strong evidence that the simplified rule did not give rise to the observed data. In addition, the mean absolute prediction error for the simplified prediction rule was \Sexpr{SEDerr[1]} {\small mm} (95\% CI: \Sexpr{SEDerr[2]}, \Sexpr{SEDerr[3]}) greater than for the fitted rule. \label{fig:bosenberg}} \end{center} \end{figure} \newpage \noindent Code listing for the Figure \ref{fig:bosenberg}: <>= <> @ \end{document}