--- title: "Reduced Rank LDA - Vowel Data" author: "Matt Shotwell" date: "`r date()`" output: html_document --- ```{r fig.height=5, fig.width=9} library('ElemStatLearn') ## read vowel training and testing data from HTF website data(vowel.train); data(vowel.test) dat <- vowel.train tst <- vowel.test ## names of input variables inp <- paste("x", 1:10, sep=".") ## get input matrix inpx <- as.matrix(dat[inp]) ## center inputs globally inpx <- t(t(inpx) - colMeans(inpx)) ## compute within-cluster means cenx <- t(sapply(unique(dat$y), function(cls) colMeans(inpx[dat$y==cls,]))) ## compute pooled within-cluster var-cov sigx <- Reduce(`+`, lapply(unique(dat$y), function(cls) { x <- inpx[dat$y==cls,] x <- t(t(x)-colMeans(x)) t(x)%*%x }))/(nrow(dat)-11) ## 'sphere' the inputs and centers svdx <- svd(sigx) sphx <- inpx%*%svdx$v%*%diag(sqrt(1/svdx$d)) sphc <- cenx%*%svdx$v%*%diag(sqrt(1/svdx$d)) ## compute canonical inputs and centers svdc <- svd(sphc) canx <- sphx%*%svdc$v canc <- sphc%*%svdc$v ## compute discriminant functions in canonical variables ## assume pi same across classes (and thus omit from fn) ## z - canonical variables (nxp) ## mz - canonical center (1xp) ## k - number of canonical variables to use (k<=p) dz <- function(z, mz, k) { zc <- t(t(z[,1:k])-mz[1:k]) drop(zc^2%*%rep(1,k)) } ## compute discriminant functions in original variables dx <- function(x, mz, k){ ## compute 'sphere' inputs sphx <- x%*%svdx$v%*%diag(sqrt(1/svdx$d)) ## compute 'canonical' inputs canx <- sphx%*% svdc$v res <- dz(canx, drop(mz), k) attr(res, 'canx') <- canx return(res) } ## plot first two canonical inputs and means cols <- rainbow(11) par(mfrow=c(1,2)) plot(canx[,1], canx[,2], xlab="Canonical Input 1", ylab="Canonical Input 2", main="Training Vowel Data (HTF Fig. 4.11)", col=cols[dat$y]) points(canc[,1], canc[,2], col=cols, pch=19, cex=2) points(canc[,1], canc[,2], pch=13, cex=2, lwd=2) ## grid spanning first two canonical variables gsz <- 500 z1g <- seq(min(canx[,1]),max(canx[,1]),length.out=gsz) z2g <- seq(min(canx[,2]),max(canx[,2]),length.out=gsz) cang <- as.matrix(expand.grid(z1=z1g, z2=z2g)) ## compute values of 11 discriminate functions on grid dzl <- apply(canc, 1, function(mz) dz(cang, drop(mz), 2)) ## compute predicted classes on grid czl <- matrix(apply(dzl,1,which.min),gsz,gsz) ## for each pair of discriminant functions, approximate ## and plot their intersections, masking the intersection ## points that do not correspond to the largest discriminant ## function for(i in 1:10) { for(j in (i+1):11) { dfm <- matrix(dzl[,i]-dzl[,j],gsz,gsz) msk <- czl == i | czl == j dfm[!msk] <- NA cls <- contourLines(z1g, z2g, dfm, levels=0) for(cl in cls) lines(cl$x, cl$y, lwd=2) } } ## project test data onto first two canonical direction tstx <- as.matrix(tst[inp]) ## center according to training colMeans tstx <- t(t(tstx) - colMeans(dat[inp])) ## sphere and project onto canonical directions tstc <- tstx%*%svdx$v%*%diag(sqrt(1/svdx$d)) %*% svdc$v ## plot test data in first two canonical direction plot(tstc[,1], tstc[,2], xlab="Canonical Input 1", ylab="Canonical Input 2", main="Testing Vowel Data", col=cols[tst$y], xlim=range(canx[,1]), ylim=range(canx[,2])) points(canc[,1], canc[,2], col=cols, pch=19, cex=2) points(canc[,1], canc[,2], pch=13, cex=2, lwd=2) ## re-plot decision boundaries for(i in 1:10) { for(j in (i+1):11) { dfm <- matrix(dzl[,i]-dzl[,j],gsz,gsz) msk <- czl == i | czl == j dfm[!msk] <- NA cls <- contourLines(z1g, z2g, dfm, levels=0) for(cl in cls) lines(cl$x, cl$y, lwd=2) } } ``` ```{r, fig.height=5, fig.width=5} ## compute misclassification fraction using k ## canonical variables err <- function(dat0, k=2) { ## project test data onto first two canonical direction dat0x <- as.matrix(dat0[inp]) ## center according to training colMeans dat0x <- t(t(dat0x) - colMeans(dat[inp])) ## sphere and project onto canonical directions dat0c <- dat0x%*%svdx$v%*%diag(sqrt(1/svdx$d)) %*% svdc$v ## compute discriminant functions dzl <- apply(canc, 1, function(mz) dz(dat0c, drop(mz), k=k)) ## classify to smallest discriminant function cls <- apply(dzl,1,which.min) ## compute misclassification rate mean(cls != dat0$y) } trn_err <- sapply(1:10, function(k) err(dat, k)) tst_err <- sapply(1:10, function(k) err(tst, k)) plot(c(1,10), range(c(trn_err,tst_err)), type="n", xlab="Number of Canonical Inputs (k)", ylab="Misclassification Rate", main="Vowel Training/Testing Error (HTF Fig. 4.10)") points(1:10, trn_err, pch=1, type="b") points(1:10, tst_err, pch=4, type="b") legend("topright", legend=c("Training", "Testing"), pch=c(1,4), lty=1, bty="n") ```