library('splines')
library('boot')
library('rpart')
set.seed(42)
col1 <- c("#882255","#332288", "#44AA99", "#CC6677", "#007733")
dat <- read.csv("nonlinear-bagging.csv")
## linear predictor (b-splines)
fit_bs <- function(dat, idx=1:nrow(dat),
knots=c(0.638, 1.020, 1.948, 2.498),
bknots = c(0.01, 2.99)) {
pxv <- seq(min(dat$x),max(dat$x),length.out=100)
dat <- dat[idx,]
fit <- lm(y ~ bs(x, knots=knots, Boundary.knots=bknots), data=dat)
#fit <- lm(y ~ x, data=dat)
pre <- suppressWarnings(predict(fit, data.frame(x=pxv)))
attr(pre, 'x') <- pxv
return(pre)
}
boo_bs <- boot(dat, fit_bs, R=1000)
## tree-based (nonlinear) predictor
fit_tr <- function(dat, idx=1:nrow(dat)) {
pxv <- seq(min(dat$x),max(dat$x),length.out=100)
dat <- dat[idx,]
fit <- rpart(y ~ x, method='anova', data=dat,
control=rpart.control(minsplit=5))
pre <- predict(fit, data.frame(x=pxv))
attr(pre, 'x') <- pxv
return(pre)
}
boo_tr <- boot(dat, fit_tr, R=1000)
plot(dat)
pre_tr <- fit_tr(dat)
lines(attr(pre_tr, 'x'), pre_tr, lwd=2, col=col1[3])
lines(attr(pre_tr, 'x'), colMeans(boo_tr$t), lwd=2, col=col1[4])
legend("topleft", c("Tree original",
"Tree bagged (B=2000)"),
lwd=2, col=col1[3:4], bty="n")
plot(dat)
pre_bs <- fit_bs(dat)
lines(attr(pre_bs, 'x'), pre_bs, lwd=2, col=col1[1])
lines(attr(pre_bs, 'x'), colMeans(boo_bs$t), lwd=2, col =col1[2])
legend("topleft", c("Splines original",
"Splines bagged (B=2000)"),
lwd=2, col=col1[1:2], bty="n")