# Code for Shepherd and Liu's discussion of 'Regularized Regression for Categorical Data' by Tutz and Gerheiss (2016). # This code builds off of code kindly provided to us by Tutz and Gertheiss, but if there are any mistakes in these examples, they are our fault. # Applying the methods of Tutz and Gertheiss to some simple scenarios with continuous Y, a single ordinal predictor X, and continuous covariate Z. rm(list=ls()) library(lars) library(ordPens) dev.off() #### Function for simulating data generate.data = function(alphax, betax, beta0y, betay, eta, sigmay, N) { z = rnorm(N,0,1) x = y = numeric(N) ## px is an N x length(alphax) matrix. ## Each row has the TRUE cummulative probabilities for each subject. px = (1 + exp(- outer(alphax, betax*z, "+"))) ^ (-1) aa = runif(N) for(i in 1:N) x[i] = sum(aa[i] > px[,i]) x = as.numeric(as.factor(x)) e <- rnorm(N,0,sigmay) y <- beta0y + eta[x] + betay*z + e return(list(x=x, y=y, z=z)) } ##### This function does the actual analyses explore_plot_Z <- function(seed=1, N=500,eta=c(0,2.4,2.5,2.6,2.7), alphax=c(-1, 0, 1,2), betax=1, beta0y=-0.5, betay=0.5, sigmay=1, K=5, cv.seed=1, show.cv=FALSE){ set.seed(seed) d<-generate.data(alphax,betax,beta0y, betay, eta, sigmay, N) Xsplit <- coding(as.matrix(d$x-1)+1, constant=FALSE, splitcod=TRUE) bols <- lm(d$y ~ Xsplit +d$z)$coef Winv <- abs(bols[-1]) # estimation Xsplit <- cbind(Xsplit, d$z) meanx <- apply(Xsplit,2,mean) Xcentered <- scale(Xsplit, center=meanx, scale=FALSE) meany <- mean(d$y) ycentered <- d$y-meany # Weights for unequal numbers in the different categories of X tab.x<-table(d$x) w.count<-1/c(sqrt(c(tab.x[1]+tab.x[2],tab.x[2]+tab.x[3],tab.x[3]+tab.x[4],tab.x[4]+tab.x[5])/N),1) Winv1<-Winv*w.count # larsEst3 <- lars(x=Xcentered,y=ycentered,normalize=FALSE,intercept=FALSE) # larsEst4 <- lars(x=t(t(Xcentered)*Winv),y=ycentered,normalize=FALSE,intercept=FALSE) larsEst3 <- lars(x=t(t(Xcentered)*w.count),y=ycentered,normalize=FALSE,intercept=FALSE) larsEst4 <- lars(x=t(t(Xcentered)*Winv1),y=ycentered,normalize=FALSE,intercept=FALSE) # coefficient paths svalues <- seq(0.001,1,length=200) # larsPaths3 <- predict(larsEst3,s=svalues,type="coef",mode="fraction")$coef # larsPaths4 <- predict(larsEst4,s=svalues,type="coef",mode="fraction")$coef # larsPaths4 <- t(t(larsPaths4)*Winv) larsPaths3 <- predict(larsEst3,s=svalues,type="coef",mode="fraction")$coef larsPaths3 <- t(t(larsPaths3)*w.count) larsPaths4 <- predict(larsEst4,s=svalues,type="coef",mode="fraction")$coef larsPaths4 <- t(t(larsPaths4)*Winv1) # intercept alphaPath3 <- meany - meanx%*%t(larsPaths3) alphaPath4 <- meany - meanx%*%t(larsPaths4) # as dummy coefficients larsPaths3 <- cbind(larsPaths3[,5],t(apply(larsPaths3[, -5],1,cumsum))) larsPaths4 <- cbind(larsPaths4[,5], t(apply(larsPaths4[, -5],1,cumsum))) ###### use cv.lars in lars package to do 5-fold cross validation if(show.cv){ par(mfrow=c(2 ,2)) } else par(mfrow=c(1,2)) # cv.larsEst3 <- cv.lars(index=svalues,x=Xcentered,y=ycentered,normalize=FALSE,intercept=FALSE,K=K, plot.it=show.cv) cv.larsEst3 <- cv.lars(index=svalues,x=t(t(Xcentered)*w.count),y=ycentered,normalize=FALSE,intercept=FALSE,K=5, plot.it=show.cv) #### find s_min which gives smallest MSE #### this is sensitive to random seed cv.index.3 <- which(cv.larsEst3$cv==min(cv.larsEst3$cv))[1] #### use the one standard error rule: smallest s which gives MSE within one standard error of best MSE cv.s.3 <- svalues[which(cv.larsEst3$cv<=cv.larsEst3$cv[cv.index.3] + cv.larsEst3$cv.error[cv.index.3])[1]] if(show.cv){ abline(v=svalues[cv.index.3], lty=3) abline(v=cv.s.3, lty=2) } ### same procedure for adaptive lasso # cv.larsEst4 <- cv.lars(index=svalues,x=t(t(Xcentered)*Winv),y=ycentered,normalize=FALSE,intercept=FALSE,K=K, plot.it=show.cv) cv.larsEst4 <- cv.lars(index=svalues,x=t(t(Xcentered)*Winv1),y=ycentered,normalize=FALSE,intercept=FALSE,K=K, plot.it=show.cv) cv.index.4 <- which(cv.larsEst4$cv==min(cv.larsEst4$cv))[1] cv.s.4 <- svalues[which(cv.larsEst4$cv<=cv.larsEst4$cv[cv.index.4] + cv.larsEst4$cv.error[cv.index.4])[1]] if(show.cv){ abline(v=svalues[cv.index.4], lty=3) abline(v=cv.s.4, lty=2) } plot(svalues,alphaPath3,type="l",lty=2,ylim=c(-1,3.5),xlab="fraction", ylab="coefficient",xlim=c(0,1.05),xaxp=c(0,1,5)) for (i in 1:ncol(larsPaths3)) { lines(svalues,larsPaths3[,i]) } title("standard") ypos <- larsPaths3[200,] text(x=1.04,y=ypos, c("z",as.character(2:max(d$x)))) abline(v=cv.s.3, lty=2) abline(v=svalues[cv.index.3], lty=3) plot(svalues,alphaPath4,type="l",lty=2,ylim=c(-1,3.5),xlab="fraction", ylab="coefficient",xlim=c(0,1.05),xaxp=c(0,1,5)) for (i in 1:ncol(larsPaths4)) { lines(svalues,larsPaths4[,i]) } title("adaptive") ypos <- larsPaths4[200,] text(x=1.04,y=ypos, c("z",as.character(2:max(d$x)))) abline(v=cv.s.4, lty=2) abline(v=svalues[cv.index.4], lty=3) } ### Monotone Steps eta<-c(0, 0, 1.5, 1.5, 3) pdf("monotone-steps.pdf", height=3, width=5) par(mar=c(3,3,1,0.1),mgp = c(1.75, 0.5, 0), oma=c(0.5,0.5,1,0.5)) explore_plot_Z(seed=20160321, cv.seed=3, eta=eta) dev.off() #### monotone non-linear eta=c(0,2.4,2.5,2.6,2.7) pdf("monotone-non-linear.pdf", height=3, width=5) par(mar=c(3,3,1,0.1),mgp = c(1.75, 0.5, 0), oma=c(0.5,0.5,1,0.5)) explore_plot_Z(seed=20160321, eta=eta, show.cv=FALSE) dev.off() ##### linear eta<-c(0, 0.5, 1, 1.5, 2) pdf("linear.pdf", height=3, width=5) par(mar=c(3,3,1,0.1),mgp = c(1.75, 0.5, 0), oma=c(0.5,0.5,1,0.5)) explore_plot_Z(seed=20160321, cv.seed=3, eta=eta) dev.off() #non-monotone eta<-c(0, 0.75,1.5,0.75,0) pdf("non-monotone.pdf", height=3, width=5) par(mar=c(3,3,1,0.1),mgp = c(1.75, 0.5, 0), oma=c(0.5,0.5,1,0.5)) explore_plot_Z(seed=20160321, cv.seed=3, eta=eta) dev.off() ############################################################################### ## Now we apply the approach to continuous data ############################################################################### rm(list=ls()) n <- 200 set.seed(20160325) z <- rnorm(n, 0, 1) y <- rnorm(n, 1+5*z, 1) d <- list(z=z, y=y) d$z_cat <- as.numeric(as.factor(d$z)) library(ordPens) Zsplit <- coding(as.matrix(d$z_cat-1)+1, constant=FALSE, splitcod=TRUE) # ordinary least squares estimate bols <- lm(d$y ~ Zsplit)$coef # (inverse) weights Winv <- abs(bols[-1]) # estimation meanz <- apply(Zsplit,2,mean) Zcentered <- scale(Zsplit, center=meanz, scale=FALSE) meany <- mean(d$y) ycentered <- d$y-meany library(lars) larsEst3 <- lars(x=Zcentered,y=ycentered,normalize=FALSE,intercept=FALSE) larsEst4 <- lars(x=t(t(Zcentered)*Winv),y=ycentered,normalize=FALSE,intercept=FALSE) # coefficient paths svalues <- seq(0.001,1,length=200) larsPaths3 <- predict(larsEst3,s=svalues,type="coef",mode="fraction")$coef larsPaths4 <- predict(larsEst4,s=svalues,type="coef",mode="fraction")$coef larsPaths4 <- t(t(larsPaths4)*Winv) # intercept alphaPath3 <- meany - meanz%*%t(larsPaths3) alphaPath4 <- meany - meanz%*%t(larsPaths4) # as dummy coefficients larsPaths3 <- t(apply(larsPaths3,1,cumsum)) larsPaths4 <- t(apply(larsPaths4,1,cumsum)) ###### use cv.lars in lars package to do 5-fold cross validation par(mfrow=c(1,2)) cv.larsEst3 <- cv.lars(index=svalues,x=Zcentered,y=ycentered,normalize=FALSE,intercept=FALSE,K=5) cv.index.3 <- which(cv.larsEst3$cv==min(cv.larsEst3$cv))[1] cv.s.3 <- svalues[which(cv.larsEst3$cv<=cv.larsEst3$cv[cv.index.3] + cv.larsEst3$cv.error[cv.index.3])[1]] abline(v=svalues[cv.index.3], lty=3) abline(v=cv.s.3, lty=2) ### same procedure for adaptive lasso cv.larsEst4 <- cv.lars(index=svalues,x=t(t(Zcentered)*Winv),y=ycentered,normalize=FALSE,intercept=FALSE,K=5) cv.index.4 <- which(cv.larsEst4$cv==min(cv.larsEst4$cv))[1] cv.s.4 <- svalues[which(cv.larsEst4$cv<=cv.larsEst4$cv[cv.index.4] + cv.larsEst4$cv.error[cv.index.4])[1]] abline(v=svalues[cv.index.4], lty=3) abline(v=cv.s.4, lty=2) pdf("continuous-linear-paths.pdf", height=3, width=5) par(mar=c(3,3,1,0.1),mgp = c(1.75, 0.5, 0), oma=c(0.5,0.5,1,0.5)) par(mfrow=c(1,2)) plot(svalues,alphaPath3,type="l",lty=2,ylim=c(min(larsPaths3), max(larsPaths3)),xlab="fraction", ylab="dummy coefficient",xlim=c(0,1.05),xaxp=c(0,1,5)) for (i in 1:ncol(larsPaths3)) { lines(svalues,larsPaths3[,i]) } title("standard") ypos <- larsPaths3[200,] text(x=1.04,y=ypos,as.character(2:max(d$z_cat)), cex=0.6, col="grey") abline(v=cv.s.3, lty=2) abline(v=svalues[cv.index.3], lty=3) plot(svalues,alphaPath4,type="l",lty=2,ylim=c(min(larsPaths4), max(larsPaths4)),xlab="fraction", ylab="dummy coefficient",xlim=c(0,1.05),xaxp=c(0,1,5)) for (i in 1:ncol(larsPaths4)) { lines(svalues,larsPaths4[,i]) } title("adaptive") ypos <- larsPaths4[200,] text(x=1.04,y=ypos,as.character(2:max(d$z_cat)), cex=0.5, col="grey") abline(v=cv.s.4, lty=2) abline(v=svalues[cv.index.4], lty=3) dev.off() #### the plot is too difficult to see ##### let's plot the coeff for each category of Z at the "fraction" choosing by cross-validation pdf("continuous-linear-chosen.pdf", height=3, width=5) par(mar=c(3,3,1,0.1),mgp = c(1.75, 0.5, 0), oma=c(0.5,0.5,1,0.5)) par(mfrow=c(1,2)) order.z <- d$z[order(d$z)] coeff.z.cv.1 <-c(alphaPath3[cv.index.3], larsPaths3[cv.index.3,]) coeff.z.cv.2 <-c(alphaPath3[which(svalues==cv.s.3)[1]], larsPaths3[which(svalues==cv.s.3)[1],]) plot(order.z[-1], coeff.z.cv.1[-1], cex=0.2, xlab="z", ylab="dummy coefficient", pch=1) points(order.z[-1], coeff.z.cv.2[-1], cex=0.2, col=2, pch=2) legend("topleft", legend=c("min MSE", "one sd rule"), col=c(1,2), pch=c(1,2), bty="n", cex=0.7) title("standard") coeff.z.cv.3 <-c(alphaPath4[cv.index.4], larsPaths3[cv.index.4,]) coeff.z.cv.4 <-c(alphaPath4[which(svalues==cv.s.4)[1]], larsPaths4[which(svalues==cv.s.4)[1],]) plot(order.z[-1], coeff.z.cv.3[-1], cex=0.2, xlab="z", ylab="dummy coefficient", pch=1) points(order.z[-1], coeff.z.cv.4[-1], cex=0.2, col=2, pch=2) legend("topleft", legend=c("min MSE", "one sd rule"), col=c(1,2), pch=c(1,2), bty="n", cex=0.7) title("adaptive") dev.off() length(unique(coeff.z.cv.1[-1])) length(unique(coeff.z.cv.2[-1])) length(unique(coeff.z.cv.3[-1])) length(unique(coeff.z.cv.4[-1])) ##### wrap this as a function explore_continous_z <- function(y, z){ d <- list(z=z, y=y) d$z_cat <- as.numeric(as.factor(d$z)) Zsplit <- coding(as.matrix(d$z_cat-1)+1, constant=FALSE, splitcod=TRUE) # ordinary least squares estimate bols <- lm(d$y ~ Zsplit)$coef # (inverse) weights Winv <- abs(bols[-1]) # estimation meanz <- apply(Zsplit,2,mean) Zcentered <- scale(Zsplit, center=meanz, scale=FALSE) meany <- mean(d$y) ycentered <- d$y-meany larsEst3 <- lars(x=Zcentered,y=ycentered,normalize=FALSE,intercept=FALSE) larsEst4 <- lars(x=t(t(Zcentered)*Winv),y=ycentered,normalize=FALSE,intercept=FALSE) # coefficient paths svalues <- seq(0.001,1,length=200) larsPaths3 <- predict(larsEst3,s=svalues,type="coef",mode="fraction")$coef larsPaths4 <- predict(larsEst4,s=svalues,type="coef",mode="fraction")$coef larsPaths4 <- t(t(larsPaths4)*Winv) # intercept alphaPath3 <- meany - meanz%*%t(larsPaths3) alphaPath4 <- meany - meanz%*%t(larsPaths4) # as dummy coefficients larsPaths3 <- t(apply(larsPaths3,1,cumsum)) larsPaths4 <- t(apply(larsPaths4,1,cumsum)) cv.larsEst3 <- cv.lars(index=svalues,x=Zcentered,y=ycentered,normalize=FALSE,intercept=FALSE,K=5, plot.it=FALSE) cv.index.3 <- which(cv.larsEst3$cv==min(cv.larsEst3$cv))[1] cv.s.3 <- svalues[which(cv.larsEst3$cv<=cv.larsEst3$cv[cv.index.3] + cv.larsEst3$cv.error[cv.index.3])[1]] ### same procedure for adaptive lasso cv.larsEst4 <- cv.lars(index=svalues,x=t(t(Zcentered)*Winv),y=ycentered,normalize=FALSE,intercept=FALSE,K=5, plot.it=FALSE) cv.index.4 <- which(cv.larsEst4$cv==min(cv.larsEst4$cv))[1] cv.s.4 <- svalues[which(cv.larsEst4$cv<=cv.larsEst4$cv[cv.index.4] + cv.larsEst4$cv.error[cv.index.4])[1]] par(mfrow=c(1,2)) order.z <- d$z[order(d$z)] coeff.z.cv.1 <-c(alphaPath3[cv.index.3], larsPaths3[cv.index.3,]) coeff.z.cv.2 <-c(alphaPath3[which(svalues==cv.s.3)[1]], larsPaths3[which(svalues==cv.s.3)[1],]) plot(c(order.z[-1],order.z[-1]), c(coeff.z.cv.1[-1],coeff.z.cv.2[-1]), type="n", xlab="z", ylab="dummy coefficient") points(order.z[-1], coeff.z.cv.1[-1], cex=0.2, col=1, pch=1) points(order.z[-1], coeff.z.cv.2[-1], cex=0.2, col=2, pch=2) legend("topleft", legend=c("min MSE", "one sd rule"), col=c(1,2), pch=c(1,2), bty="n", cex=0.7) title("standard") coeff.z.cv.3 <-c(alphaPath4[cv.index.4], larsPaths3[cv.index.4,]) coeff.z.cv.4 <-c(alphaPath4[which(svalues==cv.s.4)[1]], larsPaths4[which(svalues==cv.s.4)[1],]) plot(c(order.z[-1],order.z[-1]), c(coeff.z.cv.3[-1],coeff.z.cv.4[-1]), type="n", xlab="z", ylab="dummy coefficient") points(order.z[-1], coeff.z.cv.3[-1], cex=0.2, col=1, pch=1) points(order.z[-1], coeff.z.cv.4[-1], cex=0.2, col=2, pch=2) legend("topleft", legend=c("min MSE", "one sd rule"), col=c(1,2), pch=c(1,2), bty="n", cex=0.7) title("adaptive") } ####### n <- 200 pdf("continuous-cubic-chosen.pdf", height=3, width=5) par(mar=c(3,3,1,0.1),mgp = c(1.75, 0.5, 0), oma=c(0.5,0.5,1,0.5)) set.seed(20160325) z <- rnorm(n, 0, 1) y <- rnorm(n, 1+ z^3, 1) explore_continous_z(y=y, z=z) dev.off() #### Others that didn't make it into our discussion: #### linear effect set.seed(20160325) z <- rnorm(n, 0, 1) y <- rnorm(n, 1+5*z, 1) explore_continous_z(y=y, z=z) #### linear effect, but weaker association y <- rnorm(n, 1+1*z, 1) explore_continous_z(y=y, z=z) #### even weaker y <- rnorm(n, 1+0.5*z, 1) explore_continous_z(y=y, z=z) #### quadratic effect y <- rnorm(n, 1+ z^2, 1) explore_continous_z(y=y, z=z) y <- rnorm(n, 1+ 0.5*z^2, 1) explore_continous_z(y=y, z=z)