########################################### ## 'Orange' data; plot and fit linear model ########################################### ## plot data with(Orange, plot(age, circumference, pch=19, xlab="Age (days since 1968/12/31)", ylab="Circumference (mm)")) legend('topleft', c('linear (likelihood)', 'linear (Bayesian)', 'nonlinear (likelihood)', 'nonlinear (likelihood)'), lwd=2, col=c('#117733', '#999933','#882255','#332288'), bty='n') ## fit simple linear model library('splines') fit <- lm(circumference ~ ns(age,4), data=Orange) ## plot model fit xrn <- range(Orange$age) xpl <- seq(xrn[1], xrn[2], length.out=100) xdf <- data.frame(circumference=0,age=xpl) lines(xpl, model.matrix(fit, data=xdf)%*%coef(fit), col="#117733", lwd=2) ## compute MLE for error s.d. n <- length(fit$residuals) sig <- sqrt(var(fit$residuals)*(n-1)/n) ## all parameters of linear model par <- c(coef(fit), c(sigma=sig)) ## function to compute log likelihood (llk) for linear model llk <- function(par) { ## get model matrix mmx <- model.matrix(fit) ## get original outcome y <- fit$fitted.values + fit$residuals ## compute model predictions pre <- mmx %*% par[1:(length(par)-1)] ## compute log likelihood sum(dnorm(y, pre, sd=par[length(par)], log=TRUE)) } ## check that built-in 'logLik' matches 'llk' logLik(fit) llk(par) ## compute gradient and Hessian library('numDeriv') G <- grad(llk, par) H <- hessian(llk, par) ## compare with bootstrap 95% CI for sigma library('boot') sig_boot_fn <- function(dat, idx) { fit <- lm(circumference ~ age, data=dat[idx,]) n <- length(fit$residuals) sig <- sqrt(var(fit$residuals)*(n-1)/n) } boot.ci(boot(Orange, sig_boot_fn, R=10000), type = 'perc') ## compute approximate 95% confidence bands ## function to compute predictions g <- function(par) model.matrix(fit, data=xdf) %*% par[1:(length(par)-1)] ## compute gradient of prediction w.r.t. parameters Gg <- jacobian(g, par) ## compute variance-covariance of g Vg <- Gg %*% solve(-H) %*% t(Gg) ## compute lower and upper bound Lg <- g(par) - qnorm(0.975)*sqrt(diag(Vg)) Ug <- g(par) + qnorm(0.975)*sqrt(diag(Vg)) ## plot confidence band lines(xpl, Lg, lty=2, col='#117733', lwd=2) lines(xpl, Ug, lty=2, col='#117733', lwd=2) ################################### ## Bayesian version of linear model ################################### ## log prior density lp0 <- function(par) sum(dnorm(par, 0, 50, log=TRUE)) ## log posterior density (use llk from above) lp1 <- function(par) lp0(par) + llk(par) ## compute maximum a posteriori estimate map <- optim(par, lp1, hessian = TRUE, control=list(fnscale=-1, maxit=10000)) ## compute approximate 95% credible bands ## function to compute predictions g <- function(par) model.matrix(fit, data=xdf) %*% par[1:(length(par)-1)] ## compute gradient of prediction w.r.t. parameters Gg <- jacobian(g, map$par) ## compute variance-covariance of g Vg <- Gg %*% solve(-map$hessian) %*% t(Gg) ## compute lower and upper bound Lg <- g(map$par) - qnorm(0.975)*sqrt(diag(Vg)) Ug <- g(map$par) + qnorm(0.975)*sqrt(diag(Vg)) ## plot confidence band lines(xpl, g(map$par), lty=1, col='#999933', lwd=2) lines(xpl, Lg, lty=2, col='#999933', lwd=2) lines(xpl, Ug, lty=2, col='#999933', lwd=2) ################################### ## nonlinear model (logistic curve) ################################### ## fit nonlinear model fit_nl <- nls(circumference ~ SSlogis(age, Asym, xmid, scal), data=Orange) ## compute and plot model predictions gg <- predict(fit_nl, xdf) lines(xpl, gg, col='#882255', lwd=2) ## get gradient of prediction w.r.t. parameters Gg <- attr(gg, 'gradient') ## compute variance-covariance of g; 'vcov(fit)' gives 'solve(-H)' Vg <- Gg %*% vcov(fit_nl) %*% t(Gg) ## compute lower and upper bound Lg <- gg - qnorm(0.975)*sqrt(diag(Vg)) Ug <- gg + qnorm(0.975)*sqrt(diag(Vg)) ## plot confidence band lines(xpl, Lg, lty=2, col='#882255', lwd=2) lines(xpl, Ug, lty=2, col='#882255', lwd=2) ## estimate the age at which the average circumference is 100 par <- coef(fit_nl) g <- function(par) uniroot(function(x) 100-SSlogis(x, par[1], par[2], par[3]), c(0,1500))$root ## find x s.t. logistic function takes value 100 ## compute gradient of prediction w.r.t. parameters Gg <- grad(g, par) ## compute variance-covariance of g; 'vcov(fit)' gives 'solve(-H)' Vg <- Gg %*% vcov(fit_nl) %*% Gg ## compute lower and upper bound Lg <- g(par) - qnorm(0.975)*sqrt(diag(Vg)) Ug <- g(par) + qnorm(0.975)*sqrt(diag(Vg)) ## plot confidence interval abline(v=c(g(par),Lg,Ug), col='#332288', lwd=2, lty=c(1,2,2)) abline(h=100, col='#332288', lwd=1, lty=3) axis(1, at=c(g(par),Lg,Ug), labels=round(c(g(par),Lg,Ug)))