Miscellaneous Examples of S Code for Spline Fits
Note: These examples assume that the
Hmisc
and
Design
packages are attached.
Linear Spline Basis Functions and Variance Inflation Factors
par(mfrow=c(3,2))
a <- c(5,50,95)
a <- c(10,50,90)
cat("knot=",a,"\n")
x <- 1:100
set.seed(1)
y <- matxv(lsp(x,c(20,40,70)),c(.2,-.4,.3,-.5))+2*rnorm(100)
describe(y)
doit <- function(w, center=F) {
if(center)note("centered") else note("uncentered")
if(center) {
wbar <- apply(w,2,mean)
w <- sweep(w,2,wbar)
}
w <- cbind(1,w)
xx <- t(w) %*% w
d <- diag(xx)^.5
cm <- xx/(d %o% d)
print(cm)
vif <- diag(solve(cm))
print(vif)
f <- lsfit(w,y,intercept=F)
ls.print(f)
matxv(w,f$coef)
}
fitted <- NULL
w <- lsp(x,a)
note("default lspline")
fitted <- cbind(fitted,doit(w,F))
fitted <- cbind(fitted,doit(w,T))
w <- cbind(pmin(x,a[1]),
pmin(pmax(x-a[1],0),a[2]-a[1]),
pmin(pmax(x-a[2],0),a[3]-a[2]),
pmax(x-a[3],0))
note("double truncation")
fitted <- cbind(fitted,doit(w,F))
fitted <- cbind(fitted, doit(w,T))
print(fitted)
do <- function(i,j)max(abs(fitted[,i]-fitted[,j]))
dif <- max(do(1,2),do(1,3),do(1,4),do(2,3),do(2,4),do(3,4))
cat("Maximum abs difference:",dif,"\n")
Fit Complex Function, Impact of Randomizing Knot Locations
n<-500
x <- rnorm(n)
logit <- x-1.5*sin(x)+.7*abs(x)+exp(-x)/10
y <- ifelse(runif(n)<=1./(1.+exp(-logit)),1,0)
print(c("n=",n," d=",sum(y)),quote=F)
par(err=-1)
rcspline.plot(x,y,plotcl=F)
points(x,logit)
#smfit<-lowess(x,y,f=.50)
#points(smfit$x,log(smfit$y/(1-smfit$y)))
lower <- quantile(x,.05)
upper <- quantile(x,.95)
for(i in 1:50){
knots<-sort(c(lower,upper,rnorm(3)))
print(round(as.single(knots),2))
rcspline.plot(x,y,knots=knots,addplot=T,plotcl=F,showknots=F)
Effect of Varying Number of Knots in Ability to Fit a Complex Function using Logistic Regression
dologit<-function(x, logit, y, nk = 5, show = "xbeta", xlim = range(x), supsmu=T,
add=F,lty=1) {
if(show == "xbeta") {
if(!add)plot(x, logit, type="n",xlab="X",ylab="log Odds")
if(!add)lines(x, logit, lwd = 3)
if(supsmu)lines(logitl(supsmu(x, y)), lty = 2)
} else {
if(!add)plot(x, p, type = "l", lwd = 3)
if(supsmu)lines(supsmu(x, y), lty = 2)
}
rcspline.plot(x, y, nk = nk, show = show, add = T, xrange = xlim,
plotcl=F,showknots=F,lty=lty)
}
set.seed(13)
n <- 1500
x <- runif(n)
logit <- 2/(1+(8*abs(x-.45))^1.8)-1
x <- x[!(is.na(x) | is.na(logit))]
logit <- logit[!(is.na(x) | is.na(logit))]
xx <- order(x, x)
x <- x[xx]
logit <- logit[xx]
p <- 1/(1 + exp( - logit))
y <- ifelse(runif(length(x)) <= p, 1, 0)
oldpar<-par(mar=c(6,4,4,2)+.1)
for(i in 3:8) {
dologit(x,logit,y,nk=i,supsmu=F,add=i>3,lty=i-2)
cat("Place marks for curve for",i,"knots with mouse\n")
text(locator(),i,cex=.5)
}
title(paste("Figure 1: Effect of k for n=1500, d=",sum(y),sep=""))
title(sub="Numbers are values of k.\nBold curve is true function.",adj=0)
par(oldpar)
Effect of Varying Number of Knots in Ability to Fit a Complex Function using Logistic Regression, Example 2
dologit<-function(x, logit, y, nk = 5, show = "xbeta", xlim = range(x), supsmu=F,
add=F,lty=1,knots) {
if(show == "xbeta") {
if(!add)plot(x, logit, type="n",xlab="",ylab="", ylim=c(-1.5,2))
if(!add)lines(x, logit, lwd = 3)
if(supsmu)lines(logitl(supsmu(x, y)), lty = 2)
} else {
if(!add)plot(x, p, type = "l", lwd = 3)
if(supsmu)lines(supsmu(x, y), lty = 2)
}
rcspline.plot(x, y, nk = nk, show = show, add = T, xrange = xlim,
plotcl=F,showknots=T,lty=lty,knots=knots)
}
set.seed(23)
n <- 500
x <- runif(n)
logit <- 2/(1+(8*abs(x-.45))^1.8)-1
x <- x[!(is.na(x) | is.na(logit))]
logit <- logit[!(is.na(x) | is.na(logit))]
xx <- order(x, x)
x <- x[xx]
logit <- logit[xx]
p <- 1/(1 + exp( - logit))
y <- ifelse(runif(length(x)) <= p, 1, 0)
cat("d=",sum(y))
oldpar<-par(mfrow=c(4,4),oma=c(0,0,4,0))
set.seed(517)
figure <- 3
for(i in 1:16) {
if(figure==2)knots<-sort(runif(5))
else knots<-c(runif(1,.01,.1625),runif(1,.1625,.3875),
runif(1,.3875,.6125),
runif(1,.6125,.8375),runif(1,.8375,.99))
print(knots)
dologit(x,logit,y,nk=5,supsmu=F,add=F,knots=knots)
}
if(figure==2)mtext(side=3,line=0,cex=1.5,outer=T,
paste("Figure 2: Effect of Knot Locations for n=500, d=",sum(y),
"\n5 Knots, Uniform(0,1)",sep=""))
if(figure==3)mtext(side=3,line=0,cex=1.5,outer=T,
paste("Figure 3: Effect of Knot Locations for n=500, d=",sum(y),
"\n5 Knots, Restricted Distribution",sep=""))
cat("click left mouse button after selecting print")
locator(1)
par(oldpar)
Spline Fits vs. Stratified Means
set.seed(1)
x <- seq(60)
true.reg <- x^2/300 - x/20 + 2
plot(x,true.reg,type="l",xlab="X",ylab="Y",ylim=c(0,14))
y <- true.reg + sqrt(2) * rnorm(60)
points(x,y)
cat("mean,var:",c(mean(y),var(y)),"\n")
print(rsq.mse(x,y))
abline(lsfit(x,y),lty=4)
print(rsq.mse(cbind(x,x^2),y))
b <- lsfit(cbind(x,x^2),y)$coef
#lines(x,b[1]+b[2]*x+b[3]*x^2,lty=4)
xm <- cbind(x<=20,x>=21 & x<=40, x>=41)
print(rsq.mse(xm,y))
lines(c(1,20),rep(mean(y[x<=20]),2),lty=3)
lines(c(21,40),rep(mean(y[x>=21 & x<=40]),2),lty=3)
lines(c(41,60),rep(mean(y[x>=41]),2),lty=3)
xs <- rcspline.eval(x,knots=c(4,17,30.5,44,57),inclx=T)
print(rsq.mse(xs,y))
b <- lsfit(xs,y)$coef
lines(x,b[1]+ xs %*% b[2:5], lty=2)
legend(locator(1),c("True regression function","Spline fit",
"Linear fit","Stratified means"),
lty=c(1,2,4,3))