\documentclass[12pt]{article} \usepackage[margin=1.0in]{geometry} \title{Extended Sweave Example} \author{Theresa A Scott, MS} \date{ } \SweaveOpts{prefix.string=graphics/plot} % Created a "graphics" subdirectory to save graph files in \begin{document} \maketitle % Hidden R code chunk --- reading in the data <>= library(xtable) data(mtcars) names(mtcars) @ \section{Analysis \& Results} The \texttt{mtcars} (`Motor Trend Car Road Tests') data set is comprised of \Sexpr{ncol(mtcars)} aspects of automobile design and performance (columns) for \Sexpr{nrow(mtcars)} automobiles (rows). We wish to know if there is a significant difference in the quarter mile track times (\texttt{qsec}) between the different cylinder classes (\texttt{cyl}; 4, 6, and 8).\\ \noindent Let's first examine the quantiles (minumum, first quantile, median, and third quantile) of the times within each cylinder class. These value are given in Table \ref{table:quantiles}. % R code chunk that generates LaTeX code to create table <>= tabledf <- data.frame("N.cylinders" = sort(unique(mtcars$cyl)), "Min" = with(mtcars, tapply(X = qsec, INDEX = list(cyl), FUN = min, na.rm = TRUE)), "Q1" = with(mtcars, tapply(X = qsec, INDEX = list(cyl), FUN = quantile, prob = 0.25, na.rm = TRUE)), "Median" = with(mtcars, tapply(X = qsec, INDEX = list(cyl), FUN = median, na.rm = TRUE)), "Q3" = with(mtcars, tapply(X = qsec, INDEX = list(cyl), FUN = quantile, prob = 0.75, na.rm = TRUE)), "Max" = with(mtcars, tapply(X = qsec, INDEX = list(cyl), FUN = max, na.rm = TRUE))) print(xtable(tabledf, caption = "Quantiles of the quarter mile track times within each cylinder class", label = "table:quantiles"), caption.placement = "top", include.rownames = FALSE) @ \noindent Based on a non-parametric Kruskal-Wallis test, we see that the times are significantly different from each other: <<>>= with(mtcars, kruskal.test(qsec ~ cyl)) @ % Hidden R code chunk that assigns objects that contain calculated values to be used in \Sexpr{} % statements in following paragraph % NOTE: The objects are assigned in this code chunk because the R code in the \Sexpr{} statement % would break over multiple lines. <>= n4cyl <- nrow(subset(mtcars, cyl == 4)) n6cyl <- nrow(subset(mtcars, cyl == 6)) n8cyl <- nrow(subset(mtcars, cyl == 8)) prop4cyl <- format(round(nrow(subset(mtcars, cyl == 4))/nrow(mtcars)*100, 1), nsmall = 1) prop6cyl <- format(round(nrow(subset(mtcars, cyl == 6))/nrow(mtcars)*100, 1), nsmall = 1) prop8cyl <- format(round(nrow(subset(mtcars, cyl == 8))/nrow(mtcars)*100, 1), nsmall = 1) @ \noindent Lastly, Figure \ref{figure:boxstrip} graphically displays the distribution of the times within each cylinder class -- the raw values (shown using a `stripchart') are overlaid with side-by-side boxplots. The width of each box is also proportional to the number of cars within each cylinder group -- \Sexpr{prop4cyl}\% (\Sexpr{n4cyl}) 4 cyclinder cars; \Sexpr{prop6cyl}\% (\Sexpr{n6cyl}) 6 cyclinder cars; and \Sexpr{prop8cyl}\% (\Sexpr{n8cyl}) 8 cyclinder cars. % First a hidden R code chunk that adds some labels to certain variables --- to be referenced in plot <>= library(Hmisc) label(mtcars$qsec) <- "1/4 mile track time (sec)" label(mtcars$cyl) <- "No. of cylinders" @ % Additional LaTeX code to add caption to figure \begin{figure}\label{figure:boxstrip} \caption{Distribution of 1/4 mile track time (sec) within each cylinder class.} \begin{center} \setkeys{Gin}{width=0.75\textwidth} % LaTeX code to read the graphic file in at 75% of its original size % R code chunk that produces a graphic <>= with(mtcars, { boxplot(qsec ~ cyl, outpch = NA, varwidth = TRUE, xlab = label(cyl), ylab = label(qsec), main = paste("Boxplot of", label(qsec), "by\n", label(cyl))) stripchart(qsec ~ cyl, method = "jitter", pch = 1, vertical = TRUE, add = TRUE) }) box("figure") # Adds box around figure @ \end{center} \end{figure} \end{document}