# Initial dataset manipulations

p <- read.table("http://biostat.mc.vanderbilt.edu/wiki/pub/Main/CourseBios312/students.csv", sep=",", header=TRUE)

p$total <- p$daysabs + p$daysatt
p$male.factor <- factor(p$male, levels=0:1, labels=c("Female", "Male"))

names(p)

# The libraries that I will use
# If you have never used one of these libraries, install first
library(rms)
library(sandwich)
library(lmtest)
library(lattice)

# Summary statistics
summary(~daysabs + total + math + langarts + male, data=p, method="reverse")
mean(p$daysabs)
sd(p$daysabs)
summary(male.factor~daysabs + total + math + langarts, data=p, method="reverse")

#Histogram
with(p, hist(daysabs, breaks=150, xlim=c(0,50), col="gray"), xlab="Days Absent")

# Histogram using the lattice library
histogram(~ daysabs | male.factor, data=p)

# Unadjusted model: Classical and robust standard errors
m2 <- glm(daysabs ~ male + offset(log(total)), data=p, family="poisson")
summary(m2)
coeftest(m2, vcov=sandwich)

# Male adjusted for math scores
m3 <- glm(daysabs ~ male + math + offset(log(total)), data=p, family="poisson")
coeftest(m3, vcov=sandwich)

# Male adjusted for language arts scores
m4 <- glm(daysabs ~ male + langarts + offset(log(total)), data=p, family="poisson")
coeftest(m4, vcov=sandwich)

# Multivariable model with male, math scores, and language arts scores
m5 <- glm(daysabs ~ male + math + langarts + offset(log(total)), data=p, family="poisson")
coeftest(m5, vcov=sandwich)

# Predictions in R
male.pred <- c(0,1)
math.pred <- mean(p$math)
lang.pred <- 0:100

# Create a new dataframe that contains the covariates values that I want to make predictions for
pnew <- expand.grid(male=male.pred, math=math.pred, langarts=lang.pred, total=100)

# Make predictions from model m5
count.est <- predict(m5, newdata=pnew, type="response")

# Plot of predictions
plot(lang.pred, count.est[male.pred==0], type='l', col='Blue', ylab="Predicted number of days abscent", xlab="Language Arts score", ylim=c(0,20))
lines(lang.pred, count.est[male.pred==1], col='Red')
legend("topright", inset=0.05, c("Females", "Males"), lty=1, col=c("Blue","Red"))