## load prostate data prostate <- read.csv("prostate.csv") ## subset to training examples prostate_train <- subset(prostate, train==TRUE) ## plot lcavol vs lpsa plot_psa_data <- function(dat=prostate_train) { plot(dat$lpsa, dat$lcavol, xlab="log Prostate Screening Antigen (psa)", ylab="log Cancer Volume (lcavol)", pch = 20) } plot_psa_data() ############################ ## regular linear regression ############################ ## L2 loss function L2_loss <- function(y, yhat) (y-yhat)^2 ## fit simple linear model using numerical optimization ## ... - arguments passed to los fit_lin <- function(y, x, loss=L2_loss, beta_init = c(-0.51, 0.75), ...) { ## function to compute training error err <- function(beta) mean(loss(y, beta[1] + beta[2]*x, ...)) ## find value of beta that minimizes training error beta <- optim(par = beta_init, fn = err) return(beta) } ## make predictions from linear model predict_lin <- function(x, beta) beta[1] + beta[2]*x ## fit linear model lin_beta <- fit_lin(y=prostate_train$lcavol, x=prostate_train$lpsa, loss=L2_loss) ## compute predictions for a grid of inputs x_grid <- seq(min(prostate_train$lpsa), max(prostate_train$lpsa), length.out=100) lin_pred <- predict_lin(x=x_grid, beta=lin_beta$par) ## plot data plot_psa_data() ## plot predictions lines(x=x_grid, y=lin_pred, col='darkgreen', lwd=2) ## do the same thing with 'lm' lin_fit_lm <- lm(lcavol ~ lpsa, data=prostate_train) ## make predictins using 'lm' object lin_pred_lm <- predict(lin_fit_lm, data.frame(lpsa=x_grid)) ## plot predictions from 'lm' lines(x=x_grid, y=lin_pred_lm, col='pink', lty=2, lwd=2) ################################## ## try modifying the loss function ################################## ## tilted absolute loss tilted_abs_loss <- function(y, yhat, tau) { d <- y-yhat ifelse(d > 0, d * tau, d * (tau - 1)) } custom_loss <- tilted_abs_loss ## plot custom loss function err_grd <- seq(-1,1,length.out=200) plot(err_grd, custom_loss(0, err_grd, tau=0.50), type='l', xlab='y-yhat', ylab='custom loss') ## fit linear model with custom loss lin_beta_custom <- fit_lin(y=prostate_train$lcavol, x=prostate_train$lpsa, loss=custom_loss, tau=0.25) lin_pred_custom <- predict_lin(x=x_grid, beta=lin_beta_custom$par) ## plot data plot_psa_data() ## plot predictions from L2 loss lines(x=x_grid, y=lin_pred, col='darkgreen', lwd=2) ## plot predictions from custom loss lines(x=x_grid, y=lin_pred_custom, col='pink', lwd=2, lty=2)