---
title: "MLR with delta mehod"
author: "M. Shotwell"
date: "February 2, 2018"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

### Function to compute finite difference gradients
```{r}
## compute gradient of probability function for a sequence of c1 at median c2
## compute finite-difference gradient (c.f., nlme::fdHess)
fdGrad <- function (pars, fun, ...,
                    .relStep = (.Machine$double.eps)^(1/2), 
                    minAbsPar = 0) {
  ##pars <- as.numeric(pars)
  npar <- length(pars)
  incr <- ifelse(abs(pars) <= minAbsPar, .relStep, 
                 (abs(pars)-minAbsPar) * .relStep)
  ival <- do.call(fun, list(pars, ...))
  diff <- rep(0,npar)
  sapply(1:npar, function(i) {
    del <- rep(0,npar)
    del[i] <- incr[i]
    (do.call(fun, list(pars+del, ...))-ival)/incr[i]
  })
}
```

### Compue canonical variables from RRLDA
```{r}

library('ElemStatLearn')
library('nnet')

## read vowel training and testing data from HTF website
data(vowel.train); data(vowel.test)
dat <- vowel.train
tst <- vowel.test

## compute canonical variables from reduced rank LDA
## do logistic regression with first two canonidal variables

## names of input variables      
inp <- paste("x", 1:10, sep=".")

## get input matrix
inpx <- as.matrix(dat[inp])

## center inputs globally
inpx <- t(t(inpx) - colMeans(inpx)) 

## compute within-cluster means
cenx <- t(sapply(unique(dat$y), function(cls) 
  colMeans(inpx[dat$y==cls,])))

## compute pooled within-cluster var-cov
sigx <- Reduce(`+`, lapply(unique(dat$y), function(cls) {
  x <- inpx[dat$y==cls,]
  x <- t(t(x)-colMeans(x))
  t(x)%*%x
}))/(nrow(dat)-11)

## 'sphere' the inputs and centers
svdx <- svd(sigx)
sphx <- inpx%*%svdx$v%*%diag(sqrt(1/svdx$d))
sphc <- cenx%*%svdx$v%*%diag(sqrt(1/svdx$d))

## compute canonical inputs and centers
svdc <- svd(sphc)
canx <- sphx%*%svdc$v
canc <- sphc%*%svdc$v

canx_df <- data.frame(canx)
colnames(canx_df) <- paste0('c',1:10)
dat <- cbind(dat, canx_df)

```

```{r}

## fit (multinomial) logistic regression
fit <- multinom(y ~ c1 + c2, data=dat, Hess=TRUE)
summary(fit)


## compute class membership probabilities
mlrp <- function(x, beta) {
  xb <- x %*% t(beta)
  pr <- exp(xb)/(1+rowSums(exp(xb)))
  pr <- cbind('1'=1-rowSums(pr), pr)
  return(pr)
}

## compute approximate variance-covariance of \hat{\beta}
vcv_beta <- solve(fit$Hessian)

## function compute probability of class 1 membership as funciton of c1 and c2
mlrp_pr1 <- function(beta) 
  mlrp(model.matrix(~c1+c2, pre_dat), beta)[,1]

## generate sequence of values of c1 at 5th percentile of c2
pre_dat <- data.frame(c1=seq(min(dat$c1), max(dat$c1), length.out=200),
                      c2=quantile(dat$c2, 0.05))

## show region of interest
plot(dat$c1, dat$c2, col=dat$y, 
     xlab="Canonical Variable 1",
     ylab="Canonical Variable 2")
abline(h=quantile(dat$c2, 0.05), lwd=2, lty=2)


## compute FD gradient of probability function at estimate of beta
pre_grd <- fdGrad(coef(fit), mlrp_pr1)

## compute approximate variance covariance of probabilities
vcv_pr1 <- pre_grd%*%vcv_beta%*%t(pre_grd)

## compute probabilities with approximate 95% CI using delta method
pre_dat$pr1_est <- mlrp_pr1(coef(fit))
pre_dat$pr1_lo95 <- pmax(0, pre_dat$pr1_est - qnorm(0.975)*sqrt(diag(vcv_pr1)))
pre_dat$pr1_hi95 <- pmin(1, pre_dat$pr1_est + qnorm(0.975)*sqrt(diag(vcv_pr1)))

## plot probabilities
plot(pre_dat$c1, mlrp_pr1(coef(fit)), 
     ylim=range(c(pre_dat$pr1_lo95,pre_dat$pr1_hi95)),
     xlab='Canonical Variable 1', 
     ylab='Probability of Class 1', 
     type='l', lwd=2)
polygon(c(pre_dat$c1, rev(pre_dat$c1)),
        c(pre_dat$pr1_hi95, rev(pre_dat$pr1_lo95)),density = 15)


```