# Initial dataset manipulations p <- read.table("http://biostat.mc.vanderbilt.edu/wiki/pub/Main/CourseBios312/students.csv", sep=",", header=TRUE) p$total <- p$daysabs + p$daysatt p$male.factor <- factor(p$male, levels=0:1, labels=c("Female", "Male")) names(p) # The libraries that I will use # If you have never used one of these libraries, install first library(rms) library(sandwich) library(lmtest) library(lattice) # Summary statistics summary(~daysabs + total + math + langarts + male, data=p, method="reverse") mean(p$daysabs) sd(p$daysabs) summary(male.factor~daysabs + total + math + langarts, data=p, method="reverse") #Histogram with(p, hist(daysabs, breaks=150, xlim=c(0,50), col="gray"), xlab="Days Absent") # Histogram using the lattice library histogram(~ daysabs | male.factor, data=p) # Unadjusted model: Classical and robust standard errors m2 <- glm(daysabs ~ male + offset(log(total)), data=p, family="poisson") summary(m2) coeftest(m2, vcov=sandwich) # Male adjusted for math scores m3 <- glm(daysabs ~ male + math + offset(log(total)), data=p, family="poisson") coeftest(m3, vcov=sandwich) # Male adjusted for language arts scores m4 <- glm(daysabs ~ male + langarts + offset(log(total)), data=p, family="poisson") coeftest(m4, vcov=sandwich) # Multivariable model with male, math scores, and language arts scores m5 <- glm(daysabs ~ male + math + langarts + offset(log(total)), data=p, family="poisson") coeftest(m5, vcov=sandwich) # Predictions in R male.pred <- c(0,1) math.pred <- mean(p$math) lang.pred <- 0:100 # Create a new dataframe that contains the covariates values that I want to make predictions for pnew <- expand.grid(male=male.pred, math=math.pred, langarts=lang.pred, total=100) # Make predictions from model m5 count.est <- predict(m5, newdata=pnew, type="response") # Plot of predictions plot(lang.pred, count.est[male.pred==0], type='l', col='Blue', ylab="Predicted number of days abscent", xlab="Language Arts score", ylim=c(0,20)) lines(lang.pred, count.est[male.pred==1], col='Red') legend("topright", inset=0.05, c("Females", "Males"), lty=1, col=c("Blue","Red"))