Miscellaneous Examples of S Code for Spline Fits

Note: These examples assume that the Hmisc and Design packages are attached.

Linear Spline Basis Functions and Variance Inflation Factors

par(mfrow=c(3,2))
a <- c(5,50,95)
a <- c(10,50,90)
 cat("knot=",a,"\n")
 x <- 1:100
 set.seed(1)
 y <- matxv(lsp(x,c(20,40,70)),c(.2,-.4,.3,-.5))+2*rnorm(100)
 describe(y)

doit <- function(w, center=F) {
 if(center)note("centered") else note("uncentered")
 if(center) {
  wbar <- apply(w,2,mean)
  w <- sweep(w,2,wbar)
  }
 w <- cbind(1,w)
 xx <- t(w) %*% w
 d <- diag(xx)^.5
 cm <- xx/(d %o% d)
 print(cm)
 vif <- diag(solve(cm))
 print(vif)
 f <- lsfit(w,y,intercept=F)
 ls.print(f)
 matxv(w,f$coef)
}
fitted <- NULL
w <- lsp(x,a)
note("default lspline")
fitted <- cbind(fitted,doit(w,F))
fitted <- cbind(fitted,doit(w,T))
w <- cbind(pmin(x,a[1]),
           pmin(pmax(x-a[1],0),a[2]-a[1]),
           pmin(pmax(x-a[2],0),a[3]-a[2]),
           pmax(x-a[3],0))
note("double truncation")
fitted <- cbind(fitted,doit(w,F))
fitted <- cbind(fitted, doit(w,T))
print(fitted)

do <- function(i,j)max(abs(fitted[,i]-fitted[,j]))
dif <- max(do(1,2),do(1,3),do(1,4),do(2,3),do(2,4),do(3,4))
cat("Maximum abs difference:",dif,"\n")

Fit Complex Function, Impact of Randomizing Knot Locations

n<-500
x <- rnorm(n)
logit <- x-1.5*sin(x)+.7*abs(x)+exp(-x)/10
y <- ifelse(runif(n)<=1./(1.+exp(-logit)),1,0)
print(c("n=",n," d=",sum(y)),quote=F)
par(err=-1)
rcspline.plot(x,y,plotcl=F)
points(x,logit)
#smfit<-lowess(x,y,f=.50)
#points(smfit$x,log(smfit$y/(1-smfit$y)))
lower <- quantile(x,.05)
upper <- quantile(x,.95)
for(i in 1:50){
   knots<-sort(c(lower,upper,rnorm(3)))
   print(round(as.single(knots),2))
   rcspline.plot(x,y,knots=knots,addplot=T,plotcl=F,showknots=F)

Effect of Varying Number of Knots in Ability to Fit a Complex Function using Logistic Regression

dologit<-function(x, logit, y, nk = 5, show = "xbeta", xlim = range(x), supsmu=T,
                  add=F,lty=1) {
 if(show == "xbeta") {
  if(!add)plot(x, logit, type="n",xlab="X",ylab="log Odds")
  if(!add)lines(x, logit, lwd = 3)
  if(supsmu)lines(logitl(supsmu(x, y)), lty = 2)
  } else {
  if(!add)plot(x, p, type = "l", lwd = 3)
  if(supsmu)lines(supsmu(x, y), lty = 2)
  }
 rcspline.plot(x, y, nk = nk, show = show, add = T, xrange = xlim,
               plotcl=F,showknots=F,lty=lty)
}
set.seed(13)
n <- 1500
x <- runif(n)
logit <- 2/(1+(8*abs(x-.45))^1.8)-1
x <- x[!(is.na(x) | is.na(logit))]
logit <- logit[!(is.na(x) | is.na(logit))]
xx <- order(x, x)
x <- x[xx]
logit <- logit[xx]
p <- 1/(1 + exp( - logit))
y <- ifelse(runif(length(x)) <= p, 1, 0)
oldpar<-par(mar=c(6,4,4,2)+.1)

for(i in 3:8) {
 dologit(x,logit,y,nk=i,supsmu=F,add=i>3,lty=i-2)
 cat("Place marks for curve for",i,"knots with mouse\n")
 text(locator(),i,cex=.5)
}
title(paste("Figure 1: Effect of k for n=1500, d=",sum(y),sep=""))
title(sub="Numbers are values of k.\nBold curve is true function.",adj=0)
par(oldpar)

Effect of Varying Number of Knots in Ability to Fit a Complex Function using Logistic Regression, Example 2

dologit<-function(x, logit, y, nk = 5, show = "xbeta", xlim = range(x), supsmu=F,
                  add=F,lty=1,knots) {
 if(show == "xbeta") {
  if(!add)plot(x, logit, type="n",xlab="",ylab="", ylim=c(-1.5,2))
  if(!add)lines(x, logit, lwd = 3)
  if(supsmu)lines(logitl(supsmu(x, y)), lty = 2)
  } else {
  if(!add)plot(x, p, type = "l", lwd = 3)
  if(supsmu)lines(supsmu(x, y), lty = 2)
  }
 rcspline.plot(x, y, nk = nk, show = show, add = T, xrange = xlim,
               plotcl=F,showknots=T,lty=lty,knots=knots)
}
set.seed(23)
n <- 500
x <- runif(n)
logit <- 2/(1+(8*abs(x-.45))^1.8)-1
x <- x[!(is.na(x) | is.na(logit))]
logit <- logit[!(is.na(x) | is.na(logit))]
xx <- order(x, x)
x <- x[xx]
logit <- logit[xx]
p <- 1/(1 + exp( - logit))
y <- ifelse(runif(length(x)) <= p, 1, 0)
cat("d=",sum(y))
oldpar<-par(mfrow=c(4,4),oma=c(0,0,4,0))
set.seed(517)
figure <- 3
for(i in 1:16)   {
 if(figure==2)knots<-sort(runif(5))
 else knots<-c(runif(1,.01,.1625),runif(1,.1625,.3875),
               runif(1,.3875,.6125),
               runif(1,.6125,.8375),runif(1,.8375,.99))
 print(knots)
 dologit(x,logit,y,nk=5,supsmu=F,add=F,knots=knots)
 }
if(figure==2)mtext(side=3,line=0,cex=1.5,outer=T,
                   paste("Figure 2: Effect of Knot Locations for n=500, d=",sum(y),
                         "\n5 Knots, Uniform(0,1)",sep=""))
if(figure==3)mtext(side=3,line=0,cex=1.5,outer=T,
                   paste("Figure 3: Effect of Knot Locations for n=500, d=",sum(y),
                         "\n5 Knots, Restricted Distribution",sep=""))

cat("click left mouse button after selecting print")
locator(1)
par(oldpar)

Spline Fits vs. Stratified Means

set.seed(1)
x <- seq(60)
true.reg <- x^2/300 - x/20 + 2
plot(x,true.reg,type="l",xlab="X",ylab="Y",ylim=c(0,14))
y <- true.reg + sqrt(2) * rnorm(60)
points(x,y)
cat("mean,var:",c(mean(y),var(y)),"\n")
print(rsq.mse(x,y))
abline(lsfit(x,y),lty=4)
print(rsq.mse(cbind(x,x^2),y))
b <- lsfit(cbind(x,x^2),y)$coef
#lines(x,b[1]+b[2]*x+b[3]*x^2,lty=4)
xm <- cbind(x<=20,x>=21 & x<=40, x>=41)
print(rsq.mse(xm,y))
lines(c(1,20),rep(mean(y[x<=20]),2),lty=3)
lines(c(21,40),rep(mean(y[x>=21 & x<=40]),2),lty=3)
lines(c(41,60),rep(mean(y[x>=41]),2),lty=3)
xs <- rcspline.eval(x,knots=c(4,17,30.5,44,57),inclx=T)
print(rsq.mse(xs,y))
b <- lsfit(xs,y)$coef
lines(x,b[1]+ xs %*% b[2:5], lty=2)
legend(locator(1),c("True regression function","Spline fit",
   "Linear fit","Stratified means"),
   lty=c(1,2,4,3))
Topic revision: r1 - 10 Jul 2004, FrankHarrell
 

This site is powered by FoswikiCopyright © 2013-2020 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding Vanderbilt Biostatistics Wiki? Send feedback