library('MASS') ## for 'mcycle' library('manipulate') ## for 'manipulate' y <- mcycle$accel x <- matrix(mcycle$times, length(mcycle$times), 1) plot(x, y, xlab="Time (ms)", ylab="Acceleration (g)") ## x - n x p matrix of training inputs ## x0 - 1 x p input where to make prediction ## lambda - bandwidth (neighborhood size) kernel_epanechnikov <- function(x, x0, lambda=1) { d <- function(t) ifelse(t <= 1, 3/4*(1-t^2), 0) z <- t(t(x) - x0) d(sqrt(rowSums(z*z))/lambda) } kernel_k_nearest_neighbors <- function(x, x0, k=1) { ## compute distance betwen each x and x0 z <- t(t(x) - x0) d <- sqrt(rowSums(z*z)) ## initialize kernel weights to zero w <- rep(0, length(d)) ## set weight to 1 for k nearest neighbors w[order(d)[1:k]] <- 1 return(w) } ## y - n x 1 vector of training outputs ## x - n x p matrix of training inputs ## x0 - m x p matrix where to make predictions ## kern - kernel function to use ## ... - arguments to pass to kernel function nadaraya_watson <- function(y, x, x0, kern, ...) { k <- t(apply(x0, 1, function(x0_) { k_ <- kern(x, x0_, ...) k_/sum(k_) })) yhat <- drop(k %*% y) attr(yhat, 'k') <- k return(yhat) } ## helper function to view kernel (smoother) matrix matrix_image <- function(x) { rot <- function(x) t(apply(x, 2, rev)) cls <- rev(gray.colors(20, end=1)) image(rot(x), col=cls, axes=FALSE) xlb <- pretty(1:ncol(x)) xat <- (xlb-0.5)/ncol(x) ylb <- pretty(1:nrow(x)) yat <- (ylb-0.5)/nrow(x) axis(3, at=xat, labels=xlb) axis(2, at=yat, labels=ylb) mtext('Rows', 2, 3) mtext('Columns', 3, 3) } ## y - n x 1 vector of training outputs ## x - n x p matrix of training inputs ## kern - kernel function to use ## ... - arguments to pass to kernel function effective_df <- function(y, x, kern, ...) { y_hat <- nadaraya_watson(y, x, x, kern=kern, ...) sum(diag(attr(y_hat, 'k'))) } ## loss function ## y - train/test y ## yhat - predictions at train/test x loss_squared_error <- function(y, yhat) (y - yhat)^2 ## test/train error ## y - train/test y ## yhat - predictions at train/test x ## loss - loss function error <- function(y, yhat, loss=loss_squared_error) mean(loss(y, yhat)) ## AIC ## y - training y ## yhat - predictions at training x ## d - effective degrees of freedom aic <- function(y, yhat, d) error(y, yhat) + 2/length(y)*d ## BIC ## y - training y ## yhat - predictions at training x ## d - effective degrees of freedom bic <- function(y, yhat, d) error(y, yhat) + log(length(y))/length(y)*d ## make predictions using NW method at training inputs y_hat <- nadaraya_watson(y, x, x, kernel_epanechnikov, lambda=5) ## view kernel (smoother) matrix matrix_image(attr(y_hat, 'k')) ## compute effective degrees of freedom edf <- effective_df(y, x, kernel_epanechnikov, lambda=5) aic(y, y_hat, edf) bic(y, y_hat, edf) ## create a grid of inputs x_plot <- matrix(seq(min(x),max(x),length.out=100),100,1) ## make predictions using NW method at each of grid points y_hat_plot <- nadaraya_watson(y, x, x_plot, kernel_epanechnikov, lambda=1) ## plot predictions plot(x, y, xlab="Time (ms)", ylab="Acceleration (g)") lines(x_plot, y_hat_plot, col="#882255", lwd=2) ## how does k affect shape of predictor and eff. df using k-nn kernel ? manipulate({ ## make predictions using NW method at training inputs y_hat <- nadaraya_watson(y, x, x, kern=kernel_k_nearest_neighbors, k=k_slider) edf <- effective_df(y, x, kern=kernel_k_nearest_neighbors, k=k_slider) aic_ <- aic(y, y_hat, edf) bic_ <- bic(y, y_hat, edf) y_hat_plot <- nadaraya_watson(y, x, x_plot, kern=kernel_k_nearest_neighbors, k=k_slider) plot(x, y, xlab="Time (ms)", ylab="Acceleration (g)") legend('topright', legend = c( paste0('eff. df = ', round(edf,1)), paste0('aic = ', round(aic_, 1)), paste0('bic = ', round(bic_, 1))), bty='n') lines(x_plot, y_hat_plot, col="#882255", lwd=2) }, k_slider=slider(1, 5, initial=3, step=1))