library('MASS') ## for 'mcycle' library('manipulate') ## for 'manipulate' y <- mcycle$accel x <- mcycle$times plot(x, y, xlab="Time (ms)", ylab="Acceleration (g)") ## x - n x p matrix of training inputs ## x0 - 1 x p input where to make prediction ## lambda - bandwidth (neighborhood size) kernel_epanechnikov <- function(x, x0, lambda=1) { d <- function(t) ifelse(t <= 1, 3/4*(1-t^2), 0) z <- t(t(x) - x0) d(sqrt(rowSums(z*z))/lambda) } kernel_k_nearest_neighbors <- function(x, x0, k=1) { ## compute distance betwen each x and x0 z <- t(t(x) - x0) d <- sqrt(rowSums(z*z)) ## initialize kernel weights to zero w <- rep(0, length(d)) ## set weight to 1 for k nearest neighbors w[order(d)[1:k]] <- 1 return(w) } ## y - n x 1 vector of training outputs ## x - n x p matrix of training inputs ## x0 - m x p matrix where to make predictions ## kern - kernel function to use ## ... - arguments to pass to kernel function nadaraya_watson <- function(y, x, x0, kern, ...) { apply(x0, 1, function(x0_) { k <- kern(x, x0_, ...) sum(k*y)/sum(k) }) } ## create a grid of inputs x_plot <- matrix(seq(min(x),max(x),length.out=100),100,1) ## make predictions using NW method at each of grid points y_hat_plot <- nadaraya_watson(y, x, x_plot, kernel_epanechnikov, lambda=1) ## plot predictions plot(x, y, xlab="Time (ms)", ylab="Acceleration (g)") lines(x_plot, y_hat_plot, col="#882255", lwd=2) ## how does lambda affect shape of predictor? manipulate({ y_hat_plot <- nadaraya_watson(y, x, x_plot, kern=kernel_epanechnikov, lambda=lambda_slider) plot(x, y, xlab="Time (ms)", ylab="Acceleration (g)") lines(x_plot, y_hat_plot, col="#882255", lwd=2) }, lambda_slider=slider(0.1, 100, initial=1)) ## how does k affect shape of predictor using k-nn kernel? manipulate({ y_hat_plot <- nadaraya_watson(y, x, x_plot, kern=kernel_k_nearest_neighbors, k=k_slider) plot(x, y, xlab="Time (ms)", ylab="Acceleration (g)") lines(x_plot, y_hat_plot, col="#882255", lwd=2) }, k_slider=slider(1, 100, initial=1, step=1)) ## local constant and local linear using 'lm' local_linear <- function(y, x, x0, form=y~1, kern, ...) { apply(x0, 1, function(x0_) { w <- kern(x, x0_, ...) dat <- data.frame(y=y,x=x,w=w) fit <- lm(formula=form, weights=w, data=dat) suppressWarnings(predict(fit, data.frame(x=x0_))) }) } ## Show NW (local constant) vs. local linear y_hat_plot <- local_linear(y, x, x_plot, form = y ~ 1, kern = kernel_epanechnikov, lambda = 5) plot(x, y, xlab="Time (ms)", ylab="Acceleration (g)") lines(x_plot, y_hat_plot, col="#882255", lwd=2) y_hat_plot <- nadaraya_watson(y, x, x_plot, kern = kernel_epanechnikov, lambda = 5) lines(x_plot, y_hat_plot, col="yellow", lwd=2, lty=2) y_hat_plot <- local_linear(y, x, x_plot, form = y ~ x, kern = kernel_epanechnikov, lambda = 5) lines(x_plot, y_hat_plot, col="darkgreen", lwd=2, lty=1) legend('topleft', bty='n', lwd=2, legend=c('NW','local constant', 'local linear'), col=c('yellow', '#882255', 'darkgreen')) ## local quadratic model y_hat_plot <- local_linear(y, x, x_plot, form = y ~ poly(x, 2), kern = kernel_epanechnikov, lambda = 5) plot(x, y, xlab="Time (ms)", ylab="Acceleration (g)") lines(x_plot, y_hat_plot, col="#882255", lwd=2, lty=1) ## local natural cubic splines model y_hat_plot <- local_linear(y, x, x_plot, form = y ~ ns(x, 3), kern = kernel_epanechnikov, lambda = 5) plot(x, y, xlab="Time (ms)", ylab="Acceleration (g)") lines(x_plot, y_hat_plot, col="#882255", lwd=2, lty=1)