library('splines')
library('boot')
library('rpart')
set.seed(42)

col1 <- c("#882255","#332288", "#44AA99", "#CC6677", "#007733")

dat <- read.csv("nonlinear-bagging.csv")

## linear predictor (b-splines)
fit_bs <- function(dat, idx=1:nrow(dat),
    knots=c(0.638, 1.020, 1.948, 2.498),
    bknots = c(0.01, 2.99)) {
  pxv <- seq(min(dat$x),max(dat$x),length.out=100)
  dat <- dat[idx,]
  fit <- lm(y ~ bs(x, knots=knots, Boundary.knots=bknots), data=dat)
  #fit <- lm(y ~ x, data=dat)
  pre <- suppressWarnings(predict(fit, data.frame(x=pxv)))
  attr(pre, 'x') <- pxv
  return(pre)
}
boo_bs <- boot(dat, fit_bs, R=1000)

## tree-based (nonlinear) predictor
fit_tr <- function(dat, idx=1:nrow(dat)) {
  pxv <- seq(min(dat$x),max(dat$x),length.out=100)  
  dat <- dat[idx,]
  fit <- rpart(y ~ x, method='anova', data=dat, 
               control=rpart.control(minsplit=5))
  pre <- predict(fit, data.frame(x=pxv))
  attr(pre, 'x') <- pxv
  return(pre)
}

boo_tr <- boot(dat, fit_tr, R=1000)
plot(dat)

pre_tr <- fit_tr(dat)

lines(attr(pre_tr, 'x'), pre_tr, lwd=2, col=col1[3])
lines(attr(pre_tr, 'x'), colMeans(boo_tr$t), lwd=2, col=col1[4])
legend("topleft", c("Tree original",
                    "Tree bagged (B=2000)"),
       lwd=2, col=col1[3:4], bty="n")

plot(dat)
pre_bs <- fit_bs(dat)
lines(attr(pre_bs, 'x'), pre_bs, lwd=2, col=col1[1])
lines(attr(pre_bs, 'x'), colMeans(boo_bs$t), lwd=2, col =col1[2])
legend("topleft", c("Splines original",
                    "Splines bagged (B=2000)"),
       lwd=2, col=col1[1:2], bty="n")