--- title: "mLR - Vowel Data" author: "Matt Shotwell" date: "`r date()`" output: html_document --- ```{r fig.height=5, fig.width=9} library('ElemStatLearn') library('nnet') library('dplyr') library('magrittr') ## read vowel training and testing data from HTF website data(vowel.train) data(vowel.test) vowel.train %<>% mutate(y=as.factor(y)) vowel.test %<>% mutate(y=as.factor(y)) ## store training input column means and center inputs train.input.means <- vowel.train %>% select(starts_with('x')) %>% colMeans vowel.train <- cbind( vowel.train %>% select(y), vowel.train %>% select(-y) %>% scale(center=train.input.means, scale=F)) vowel.test <- cbind( vowel.test %>% select(y), vowel.test %>% select(-y) %>% scale(center=train.input.means, scale=F)) ## multinomial logistic regression fit <- multinom(y ~ ., data=vowel.train) ## functions to compute testing/training error ## y - n-by-K target coded outcome matrix ## prob - n-by-K probability matrix zero_one_loss <- function(y, prob) ifelse(apply(y, 1, which.max) == apply(prob, 1, which.max), 0, 1) ## y - n-by-K target coded outcome matrix ## prob - n-by-K probability matrix cross_entropy_loss <- function(y, prob) -rowSums(y * log(prob)) ## compute training/testing error error <- function(dat, fit, loss=zero_one_loss) { ## compute probabilities from model prob <- predict(fit, newdata=dat, type='probs') ## target coding of outcome y <- model.matrix(~0+y, data=dat) mean(loss(y, prob)) } ## do principal components regression with LR svdx <- svd(vowel.train %>% select(-y)) yz_train <- cbind( vowel.train %>% select(y), (vowel.train %>% select(-y) %>% as.matrix) %*% svdx$v) yz_test <- cbind( vowel.test %>% select(y), (vowel.test %>% select(-y) %>% as.matrix) %*% svdx$v) ## fit with first 10:1 PCs # nPC = 0 leaves only outcome seqPC <- 10:0 pcr_fits <- lapply(seqPC, function(nPC) multinom(y ~ ., data=yz_train %>% select(c(1+0:nPC)))) ## compute training and testing errors as function of lambda pcr_err_train <- sapply(pcr_fits, function(fit) error(yz_train, fit)) pcr_err_test <- sapply(pcr_fits, function(fit) error(yz_test, fit)) ## plot test/train error plot(x=range(seqPC), y=range(c(pcr_err_train, pcr_err_test)), type='n', xlab='# PCs used', ylab='train/test error', main='Zero-one loss') points(seqPC, pcr_err_train, pch=19, type='b', col='darkblue') points(seqPC, pcr_err_test, pch=19, type='b', col='darkred') legend('topright', c('train','test'), lty=1, pch=19, col=c('darkblue','darkred'), bty='n') ## compute training and testing errors as function of lambda pcr_err_train <- sapply(pcr_fits, function(fit) error(yz_train, fit, cross_entropy_loss)) pcr_err_test <- sapply(pcr_fits, function(fit) error(yz_test, fit, cross_entropy_loss)) ## plot test/train error plot(x=range(seqPC), y=range(c(pcr_err_train, pcr_err_test)), type='n', xlab='# PCs used', ylab='train/test error', main='Cross-entropy loss') points(seqPC, pcr_err_train, pch=19, type='b', col='darkblue') points(seqPC, pcr_err_test, pch=19, type='b', col='darkred') legend('topright', c('train','test'), lty=1, pch=19, col=c('darkblue','darkred'), bty='n') ```