############################################################################ ### Non-linear association ############################################################################ rm(list=ls()) #install.packages("rms") #install.packages("splines") library(rms) library(splines) ### Generating data with a weird non-linear relationship between x and y n<-100 x<-rnorm(n,0,1) y<-rnorm(n, 2+sin(x)+.2*x^2, .2) plot(x,y) ### Fitting a linear model mod1<-lm(y ~ x) summary(mod1) abline(mod1$coeff) ### Fitting a quadratic model mod2<-lm(y~x+I(x^2)) ### Calculating the predicted value of y by hand from the quadratic model ### The specific numbers used below will be different when new data are generated #xnew<-c(-30:30)/10 #mod2 #ynew<-2.0457 + 0.6094*xnew + 0.1148*xnew^2 #lines(xnew,ynew, col=2) ### Calculating the predicted value of y from the quadratic model ### This code is reproducible (i.e., when new data are used, it will still give the right answer) xpreds<-x[order(x)] data.x<-data.frame(x=xpreds) preds2<-predict(mod2,newdata=data.x) lines(xpreds,preds2,col=3) ### Fitting a cubic model mod3<-lm(y ~ x+I(x^2)+I(x^3)) preds3<-predict(mod3,newdata=data.x) lines(xpreds,preds3,col=2, lwd=4) ### Fitting a model with natural splines and 3 degrees of freedom mod.ns3<-lm(y ~ ns(x,df=3)) preds.ns3<-predict(mod.ns3,newdata=data.x) lines(xpreds,preds.ns3,col=4) ### Fitting a model with natural splines and 4 degrees of freedom mod.ns4<-lm(y~ns(x,df=4)) preds.ns4<-predict(mod.ns4,newdata=data.x) lines(xpreds,preds.ns4,col=5) ### Fitting a model with natural splines and 3 degrees of freedom using a new function, ### the rcs() function in the rms package. The degrees of freedom (3 in this case), ### is one less than parms (4 in this case). mod.rcs4<-lm(y~rcs(x,parms=4)) preds.rcs4<-predict(mod.rcs4,newdata=data.x) lines(xpreds,preds.rcs4,col=6,lwd=3) ### Fitting a model with natural splines and 9 degrees of freedom. A restricted cubic ### spline (rcs) is the same as a natural spline. mod.rcs10<-lm(y~rcs(x,parms=10)) preds.rcs10<-predict(mod.rcs10,newdata=data.x) lines(xpreds,preds.rcs10,col=7,lwd=3) ### Now I'm plotting the true relationship between x and y to see how close we got. lines(xpreds, 2+sin(xpreds)+.2*xpreds^2, lwd=3) ### This calculates the AIC to examine model fit. The lower the value, the better. AIC(mod1) AIC(mod.ns3) AIC(mod.ns4) AIC(mod3) AIC(mod2) AIC(mod.rcs10) AIC(mod.rcs4) ### This tests for non-linearity in the cubic model by comparing it with the model that ### assumes linearity. anova(mod1,mod3) #### Interpretable quantities ## Calculating E(Y|X=2) - E(Y|X=1) based on the cubic model vals<-c(2,1) est3<-mod3$coefficients%*%c(1,vals[1],vals[1]^2,vals[1]^3)- mod3$coefficients%*%c(1,vals[2],vals[2]^2,vals[2]^3) est3 ### 95% CI for E(Y|X=2) - E(Y|X=1) based on the cubic model vcov(mod3) sqrt(diag(vcov(mod3))) summary(mod3) #effect.size= mod$coeff%*%values1 - mod$coeff%*%values2 # = mod$coeff%*%(values1-values2) #var(aX+bY) = var(X)*a^2+var(Y)*b^2+2*a*b*cov(X,Y) # matrix notation: var(X%*%a) = a%*%Var(X)%*%a a<-c(1,vals[1],vals[1]^2,vals[1]^3)-c(1,vals[2],vals[2]^2,vals[2]^3) var.est<-a%*%vcov(mod3)%*%a var.est<-as.vector(a%*%vcov(mod3)%*%a) est3<-as.vector(est3) est3+c(-1,1)*1.96*sqrt(var.est) ##### We did not do this part in class, but I have included it in case you are interested. #### Making rcs() identical to ns() knots<-quantile(x, c(0,.25,.5,.75,1)) mod.rcs5b<-lm(y~rcs(x,parms=knots)) AIC(mod.rcs5b) AIC(mod.ns4) #### Making ns() identical to rcs() knots2<-quantile(x, c(0.05, 0.275, 0.5, 0.725, 0.95)) mod.ns4b<-lm(y~ns(x,knots=knots2[2:4], Boundary.knots=knots2[c(1,5)])) AIC(mod.ns4b) AIC(mod.rcs5)