################# code for sims and applications in orm paper ################ library(rms) ################################### FUNCTIONS ################ #### function to estimate conditional mean and its standard error for orm model mean.orm <- function(mod, new.data, se=TRUE){ if(is.null(mod$yunique)) { stop("Need to set x=TRUE and y=TRUE for orm") } else{ order.y <- mod$yunique n.alpha <- length(order.y)-1 xb <- as.matrix(new.data)%*%matrix(coef(mod)[colnames(new.data)]) m.alpha <- mod$coef[1:n.alpha] lb <- t(outer(m.alpha, xb, "+")[,,1]) m.s <- mod$trans$cumprob(lb) m.f <- t(apply(m.s, 1, FUN=function(x) c(1,x[1:n.alpha]) - c(x[1:n.alpha], 0))) m.mean <- apply(m.f, 1, FUN=function(x) sum(x*order.y)) if(se){ if(mod$family=="logistic") mod$trans$deriv <- function(x) exp(-x)/(1+exp(-x))^2 dmean.dalpha <- t(apply(mod$trans$deriv(lb), 1, FUN=function(x) x*(order.y[2:length(order.y)] - order.y[1:n.alpha]))) dmean.dbeta <- apply(dmean.dalpha, 1, sum)*as.matrix(new.data) dmean.dtheta <- cbind(dmean.dalpha, dmean.dbeta) mean.var <-diag(dmean.dtheta%*%solve(mod$info.matrix)%*%t(dmean.dtheta)) mean.se <- sqrt(mean.var) result <- cbind(m.mean, mean.se) ci <- t(apply(result, 1, FUN=function(x) c(x[1]- qnorm(0.975)*x[2], x[1]+ qnorm(0.975)*x[2]))) result <- cbind(result, ci) colnames(result) <- c("est", "se", "lb", "ub") } else{ result <- matrix(m.mean) colnames(result) <- c("est") } return(result) } } #### function to estimate conditional quantiles and confidence intervals for orm model quantile.orm <- function(mod, new.data, probs=0.5, se=TRUE){ quantile <- matrix(NA, nrow=dim(new.data)[1], ncol=length(probs)) order.y <- mod$yunique #n.alpha <- length(order.y)-1 xb <- as.matrix(new.data)%*%matrix(coef(mod)[colnames(new.data)]) alpha <- mod$coef[1:(length(unique(order.y))-1)] lb <- t(outer(alpha, xb, "+")[,,1]) m.cdf <- 1- mod$trans$cumprob(lb) m.cdf <- cbind(0, m.cdf, 1) for(i in 1: length(probs)){ try({ index.1 <- apply(m.cdf, 1, FUN=function(x){ max(which(x<=probs[i]))[1]} ) index.2 <- apply(m.cdf, 1, FUN=function(x){ min(which(x>=probs[i]))[1]} ) index.y1 <- ifelse(index.1>length(order.y), Inf, order.y[index.1]) index.y2 <- ifelse(index.2>length(order.y),Inf,order.y[index.2]) index.y1.cdf <- ifelse(index.1==0, 0, m.cdf[cbind(1:dim(new.data)[1], index.1)]) index.y2.cdf <- ifelse(index.2>length(order.y), 1, m.cdf[cbind(1:dim(new.data)[1], index.2)]) quantile[,i] <- ifelse(index.1==index.2, index.y1, (index.y2-index.y1)/(index.y2.cdf - index.y1.cdf)*(probs[i]-index.y1.cdf) + index.y1) quantile[, i] <- ifelse(is.infinite(quantile[,i]), max(order.y), quantile[, i]) }) } result <- quantile if(se){ if(mod$family=="logistic") mod$trans$deriv <- function(x) exp(-x)/(1+exp(-x))^2 quantile.lb <- quantile.ub <- matrix(NA, nrow=dim(new.data)[1], ncol=length(probs)) lb.se <- matrix(NA, ncol=dim(lb)[2], nrow=dim(new.data)[1]) var <- as.matrix(solve(mod$info.matrix)) for(i in 1:dim(lb)[2]){ var.i <- var[c(i, which(names(coef(mod)) %in% colnames(new.data))), c(i, which(names(coef(mod)) %in% colnames(new.data)))] dcdf.dtheta <- cbind(-mod$trans$deriv(lb[,i]), -mod$trans$deriv(lb[,i])*as.matrix(new.data) ) dlb.dtheta <- as.matrix(cbind(1, new.data)) lb.se[,i] <- sqrt(diag(dlb.dtheta%*%var.i%*% t(dlb.dtheta))) } ci.lb <- sapply(1:dim(lb)[2], FUN=function(i) { 1- mod$trans$cumprob(lb[, i] +qnorm(0.975)*lb.se[, i])}) ci.ub <- sapply(1:dim(lb)[2], FUN=function(i) { 1- mod$trans$cumprob(lb[, i] -qnorm(0.975)*lb.se[, i])}) ci.lb <- matrix(ci.lb, nrow=dim(new.data)[1]) ci.ub <- matrix(ci.ub, nrow=dim(new.data)[1]) ci.lb <- cbind(0, ci.lb, 1) ci.ub <- cbind(0, ci.ub, 1) for(i in 1: length(probs)){ try({ index.1 <- apply(ci.lb, 1, FUN=function(x){ max(which(x<=probs[i]))[1]} ) index.2 <- apply(ci.lb, 1, FUN=function(x){ min(which(x>=probs[i]))[1]} ) index.y1 <- ifelse(index.1>length(order.y), Inf, order.y[index.1]) index.y2 <- ifelse(index.2>length(order.y),Inf,order.y[index.2]) index.y1.cdf <- ifelse(index.1==0, 0, ci.lb[cbind(1:dim(new.data)[1], index.1)]) index.y2.cdf <- ifelse(index.2>length(order.y), 1, ci.lb[cbind(1:dim(new.data)[1], index.2)]) quantile.lb[,i] <- ifelse(index.1==index.2, index.y1, (index.y2-index.y1)/(index.y2.cdf - index.y1.cdf)*(probs[i]-index.y1.cdf) + index.y1) quantile.lb[, i] <- ifelse(is.infinite(quantile.lb[,i]), max(order.y), quantile.lb[, i]) index.1 <- apply(ci.ub, 1, FUN=function(x){ max(which(x<=probs[i]))[1]} ) index.2 <- apply(ci.ub, 1, FUN=function(x){ min(which(x>=probs[i]))[1]} ) index.y1 <- ifelse(index.1>length(order.y), Inf, order.y[index.1]) index.y2 <- ifelse(index.2>length(order.y),Inf,order.y[index.2]) index.y1.cdf <- ifelse(index.1==0, 0, ci.ub[cbind(1:dim(new.data)[1], index.1)]) index.y2.cdf <- ifelse(index.2>length(order.y), 1, ci.ub[cbind(1:dim(new.data)[1], index.2)]) quantile.ub[,i] <- ifelse(index.1==index.2, index.y1, (index.y2-index.y1)/(index.y2.cdf - index.y1.cdf)*(probs[i]-index.y1.cdf) + index.y1) quantile.ub[, i] <- ifelse(is.infinite(quantile.ub[,i]), max(order.y), quantile.ub[, i]) }) } result <- list(quantile=quantile, lb=quantile.ub, ub=quantile.lb) } return(result) } #### function to estimate conditional CDF and its standard error for orm models cdf.orm <- function(mod, new.data, at.y=0,se=TRUE){ if(is.null(mod$yunique)) { stop("Need to set x=TRUE and y=TRUE for orm") } else{ order.y <- mod$yunique xb <- as.matrix(new.data)%*%matrix(coef(mod)[colnames(new.data)]) index <- sapply(at.y, FUN=function(x) {if(x= max(order.y)[1]) result <- -Inf else which(order.y>=x)[1]-1}) m.alpha <- mod$coef[index] m.alpha <- ifelse(is.infinite(index), index, m.alpha) if(length(at.y)==1){ lb <- as.matrix(outer(m.alpha, xb, "+")[,,1]) } else lb <- t(outer(m.alpha, xb, "+")[,,1]) m.cdf <- 1- mod$trans$cumprob(lb) if(se){ if(mod$family=="logistic") mod$trans$deriv <- function(x) exp(-x)/(1+exp(-x))^2 cdf.se <- matrix(NA, ncol=length(at.y), nrow=dim(new.data)[1]) lb.se <- matrix(NA, ncol=length(at.y), nrow=dim(new.data)[1]) var <- as.matrix(solve(mod$info.matrix)) for(i in 1:length(at.y)) { var.i <- var[c(index[i], which(names(coef(mod)) %in% colnames(new.data))), c(index[i], which(names(coef(mod)) %in% colnames(new.data)))] dcdf.dtheta <- cbind(-mod$trans$deriv(lb[,i]), -mod$trans$deriv(lb[,i])*as.matrix(new.data) ) dlb.dtheta <- as.matrix(cbind(1, new.data)) cdf.se[,i] <- sqrt(diag(dcdf.dtheta %*% var.i%*% t(dcdf.dtheta))) lb.se[, i] <- sqrt(diag(dlb.dtheta%*%var.i%*% t(dlb.dtheta))) } ci.lb <- sapply(1:length(at.y), FUN=function(i) { 1- mod$trans$cumprob(lb[, i] +qnorm(0.975)*lb.se[, i])}) ci.ub <- sapply(1:length(at.y), FUN=function(i) { 1- mod$trans$cumprob(lb[, i] -qnorm(0.975)*lb.se[, i])}) result <- list(est=m.cdf, se=cdf.se, lb=ci.lb, ub=ci.ub) } else{ result <- list(est=m.cdf) } return(result) } } ############################ SIMULATIONS ############## ###########Simulation 1: orm with properly specified link functions under #################### ################### i) normal error ################### generate.data.1 <- function(seed=1, n=200, p=0.5, alpha=0, beta=c(1, -0.5), sigma=1){ set.seed(seed) z1 <- sample(c(0,1), size=n, replace=TRUE, prob=c(1-p, p)) z2 <- rnorm(n, 0, 1) #y.star <- rnorm(n, alpha+beta*z, sigma) #y <- exp(y.star) log.y <- rnorm(n, alpha+beta[1]*z1 + beta[2]*z2, sigma) data <- data.frame(y=log.y, z1=z1, z2=z2) return(data) } ######### Sim 1 (i) regression coefficients ################ sim1_coeff.fun <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1,-0.5), sigma=1, log.y=c(-1, -0.33, 0.5, 1.33, 2), se=TRUE){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] alpha.y <- matrix(NA, ncol=length(log.y), nrow=sim) alpha.y.se <- matrix(NA, ncol=length(log.y), nrow=sim) beta.est <- matrix(NA, ncol=length(beta), nrow=sim) beta.se <- matrix(NA, ncol=length(beta), nrow=sim) for(i in 1:sim){ try({ data <- generate.data.1(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta, sigma=sigma) order.y <- data$y[order(data$y)] m.orm <- orm(y~z1+z2, data=data, family="probit") beta.est[i,] <- m.orm$coef[c(n, n+1)] beta.se[i, 1] <- sqrt(m.orm$var["z1", "z1"]) beta.se[i, 2] <- sqrt(m.orm$var["z2", "z2"]) alpha.y[i,] <- -sapply(log.y, FUN=function(x) {if(x<=min(data$y)) result <- Inf else if(x > max(data$y)) result <- -Inf else m.orm$coef[which(order.y>=x)[1]-1]}) if(se){ var <- as.matrix(solve(m.orm$info.matrix)) alpha.y.se[i,] <- sqrt(sapply(log.y, FUN=function(x) {if(x<=min(data$y)) result <- Inf else if(x > max(data$y)) result <- Inf else var[which(order.y>=x)[1]-1, which(order.y>=x)[1]-1]}) ) } }) } return(list(beta.est=beta.est, beta.se=beta.se, alpha.y=alpha.y, alpha.y.se=alpha.y.se)) } seed=1 p <- 0.5 alpha <- 0 beta <- c(1, -0.5) sigma <- 1 log.y <- c(-1, -0.33, 0.5, 1.33, 2) sim=10^4 #sim=100 sim1_coeff.n25 <- sim1_coeff.fun(n=25, sim=sim) sim1_coeff.n50 <- sim1_coeff.fun(n=50, sim=sim) sim1_coeff.n100 <- sim1_coeff.fun(n=100, sim=sim) sim1_coeff.n200 <- sim1_coeff.fun(n=200, sim=sim) sim1_coeff.n500 <- sim1_coeff.fun(n=500, sim=sim) sim1_coeff.n1000 <- sim1_coeff.fun(n=1000, sim=sim) sim1_coeff.summary <- function(result, beta=c(1, -0.5),alpha.y=c(-1, -0.33, 0.5, 1.33, 2)){ mean.est <- c(apply(result$beta.est, 2, FUN=function(x) mean(x, na.rm=TRUE)), apply(ifelse(is.infinite(result$alpha.y), NA, result$alpha.y), 2, FUN=function(x) mean(x, na.rm=TRUE))) mean.se <- c(apply(result$beta.se, 2, FUN=function(x) mean(x, na.rm=TRUE)), apply(ifelse(is.infinite(result$alpha.y.se), NA, result$alpha.y.se), 2, FUN=function(x) mean(x, na.rm=TRUE))) emp.se <- c(apply(result$beta.est, 2, FUN=function(x) sd(x, na.rm=TRUE)), apply(ifelse(is.infinite(result$alpha.y), NA, result$alpha.y), 2, FUN=function(x) sd(x, na.rm=TRUE))) mse <- c(apply((result$beta.est-matrix(beta, nrow=dim(result$beta.est)[1], ncol=length(beta),byrow=TRUE))^2, 2, FUN=function(x) mean(x, na.rm=TRUE)), apply((ifelse(is.infinite(result$alpha.y), NA, result$alpha.y)-matrix(alpha.y, nrow=dim(result$alpha.y)[1], ncol=length(alpha.y), byrow=TRUE))^2, 2 ,FUN=function(x) mean(x, na.rm=TRUE))) beta.lb <-result$beta.est+qnorm(0.025, 0, 1)*result$beta.se beta.ub <- result$beta.est-qnorm(0.025, 0, 1)*result$beta.se alpha.lb <- cbind(result$alpha.y+qnorm(0.025, 0, 1)*result$alpha.y.se) alpha.ub <- cbind(result$alpha.y-qnorm(0.025, 0, 1)*result$alpha.y.se) alpha.lb <- ifelse(is.infinite(alpha.lb), NA, alpha.lb) alpha.ub <- ifelse(is.infinite(alpha.ub), NA, alpha.ub) alpha.coverage <- rep(NA, length(alpha.y)) for(i in 1:length(alpha.y)){ alpha.coverage[i] <-mean(apply(cbind(alpha.lb[,i], alpha.ub[,i]), 1, FUN=function(x) alpha.y[i]>=x[1] & alpha.y[i]<=x[2]), na.rm=TRUE) } beta.coverage <- rep(NA, length(beta)) for(i in 1:length(beta)){ beta.coverage[i] <-mean(apply(cbind(beta.lb[,i], beta.ub[,i]), 1, FUN=function(x) beta[i]>=x[1] & beta[i]<=x[2]), na.rm=TRUE) } coverage <- c(beta.coverage, alpha.coverage) result <- cbind(format(c(beta, alpha.y), nsmall=1), format(round(mean.est, 2), nsmall=2), format(round(mean.se, 3) , nsmall=3), format(round(emp.se, 3) , nsmall=3), format(round(mse, 4) , nsmall=3), format(round(coverage, 3) , nsmall=3)) return(result) } ######## Table S.2; Figure 6 (i) error ~ Normal ################ sim1_coeff.probit.n25 <- sim1_coeff.summary(sim1_coeff.n25) sim1_coeff.probit.n50 <- sim1_coeff.summary(sim1_coeff.n50) sim1_coeff.probit.n100 <- sim1_coeff.summary(sim1_coeff.n100) sim1_coeff.probit.n200 <- sim1_coeff.summary(sim1_coeff.n200) sim1_coeff.probit.n500 <- sim1_coeff.summary(sim1_coeff.n500) sim1_coeff.probit.n1000 <- sim1_coeff.summary(sim1_coeff.n1000) ######### Sim 1 (i) regression coefficients compared with Box-Cox transformation ######### ################ box.cox.trans <- function(x,lambda){ if(lambda==0){ x <- log(x) }else{ x <- (x^lambda-1)/lambda } return(x) } generate.data.1 <- function(seed=1, n=200, p=0.5, alpha=0, beta=c(1, -0.5), sigma=1){ set.seed(seed) z1 <- sample(c(0,1), size=n, replace=TRUE, prob=c(1-p, p)) z2 <- rnorm(n, 0, 1) #y.star <- rnorm(n, alpha+beta*z, sigma) #y <- exp(y.star) log.y <- rnorm(n, alpha+beta[1]*z1 + beta[2]*z2, sigma) data <- data.frame(y=exp(log.y), z1=z1, z2=z2) return(data) } sim1_boxcox.fun <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1, -0.5), sigma=1, log.y=c(-1, -0.33, 0.5, 1.33, 2), se=TRUE){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] t.y <- matrix(NA, ncol=length(log.y), nrow=sim) beta.est <- matrix(NA, ncol=length(beta), nrow=sim) beta.se <- matrix(NA, ncol=length(beta), nrow=sim) beta.confint <- matrix(NA, ncol=4, nrow=sim) for(i in 1:sim){ try({ data <- generate.data.1(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta, sigma=sigma) b.m <- boxcox(y~z1+z2, data=data, plotit=F, interp = TRUE) lambda <- b.m$x[which(b.m$y==max(b.m$y))][1] data$y.trans <- box.cox.trans(data$y, lambda) mod <- ols(y.trans~z1+z2, data=data) beta.est[i, ] <- mod$coef[c("z1", "z2")] beta.se[i, 1] <- sqrt(mod$var["z1", "z1"]) beta.se[i, 2] <- sqrt(mod$var["z2", "z2"]) beta.confint[i,c(1,2)] <- confint(mod)["z1",] beta.confint[i,c(3,4)] <- confint(mod)["z2",] t.y[i,] <- box.cox.trans(exp(log.y), lambda) }) } return(list(beta.est=beta.est, beta.se=beta.se, beta.confint=beta.confint, t.y=t.y)) } seed=1 p <- 0.5 alpha <- 0 beta <- c(1,-0.5) sigma <- 1 log.y=c(-1, -0.33, 0.5, 1.33, 2) sim=10^4 sim1_boxcox.n25 <- sim1_boxcox.fun(n=25, sim=sim) sim1_boxcox.n50 <- sim1_boxcox.fun(n=50, sim=sim) sim1_boxcox.n100 <- sim1_boxcox.fun(n=100, sim=sim) sim1_boxcox.n200 <- sim1_boxcox.fun(n=200, sim=sim) sim1_boxcox.n500 <- sim1_boxcox.fun(n=500, sim=sim) sim1_boxcox.n1000 <- sim1_boxcox.fun(n=1000, sim=sim) ###### relative efficiency: orm vs. box-cox RE.summary <- function(result1, result2,beta=c(1, -0.5)){ MSE1 <- apply(result1$beta.est -matrix(beta, nrow=dim(result1$beta.est)[1], ncol=length(beta),byrow=TRUE), 2, FUN=function(x) mean(x^2, na.rm=TRUE)) MSE2 <- apply(result2$beta.est -matrix(beta, nrow=dim(result2$beta.est)[1], ncol=length(beta),byrow=TRUE), 2, FUN=function(x) mean(x^2, na.rm=TRUE)) re <- MSE2/MSE1 return(re) } sim1_beta.re.n25 <- RE.summary(sim1_coeff.n25, sim1_boxcox.n25) sim1_beta.re.n50 <- RE.summary(sim1_coeff.n50, sim1_boxcox.n50) sim1_beta.re.n100 <- RE.summary(sim1_coeff.n100, sim1_boxcox.n100) sim1_beta.re.n200 <- RE.summary(sim1_coeff.n200, sim1_boxcox.n200) sim1_beta.re.n500 <- RE.summary(sim1_coeff.n500, sim1_boxcox.n500) sim1_beta.re.n1000 <- RE.summary(sim1_coeff.n1000, sim1_boxcox.n1000) ##### Figure 7: relative efficiency (a) sim1_re.probit.beta.1 <- c(sim1_beta.re.n25[1], sim1_beta.re.n50[1], sim1_beta.re.n100[1], sim1_beta.re.n200[1], sim1_beta.re.n500[1], sim1_beta.re.n1000[1]) sim1_re.probit.beta.2 <- c(sim1_beta.re.n25[2], sim1_beta.re.n50[2], sim1_beta.re.n100[2], sim1_beta.re.n200[2], sim1_beta.re.n500[2], sim1_beta.re.n1000[2]) ####### Sim 1 (i) cdf and mean generate.data.1 <- function(seed=1, n=200, p=0.5, alpha=0, beta=c(1, -0.5), sigma=1){ set.seed(seed) z1 <- sample(c(0,1), size=n, replace=TRUE, prob=c(1-p, p)) z2 <- rnorm(n, 0, 1) #y.star <- rnorm(n, alpha+beta*z, sigma) #y <- exp(y.star) log.y <- rnorm(n, alpha+beta[1]*z1 + beta[2]*z2, sigma) data <- data.frame(y=exp(log.y), z1=z1, z2=z2) return(data) } sim1_cdfmean.fun <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1, -0.5), sigma=1, log.y=c(-1, -0.33, 0.5, 1.33, 2), se=TRUE){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] control.est <- matrix(NA, ncol=length(log.y)+1, nrow=sim) trt.est <- matrix(NA, ncol=length(log.y)+1, nrow=sim) control.est.1 <- matrix(NA, ncol=length(log.y)+1, nrow=sim) trt.est.1 <- matrix(NA, ncol=length(log.y)+1, nrow=sim) control.se <- matrix(NA, ncol=length(log.y)+1, nrow=sim) trt.se <- matrix(NA, ncol=length(log.y)+1, nrow=sim) control.se.1 <- matrix(NA, ncol=length(log.y)+1, nrow=sim) trt.se.1 <- matrix(NA, ncol=length(log.y)+1, nrow=sim) control.lb <- matrix(NA, ncol=length(log.y)+1, nrow=sim) control.ub <- matrix(NA, ncol=length(log.y)+1, nrow=sim) trt.lb <- matrix(NA, ncol=length(log.y)+1, nrow=sim) trt.ub <- matrix(NA, ncol=length(log.y)+1, nrow=sim) control.lb.1 <- matrix(NA, ncol=length(log.y)+1, nrow=sim) control.ub.1 <- matrix(NA, ncol=length(log.y)+1, nrow=sim) trt.lb.1 <- matrix(NA, ncol=length(log.y)+1, nrow=sim) trt.ub.1 <- matrix(NA, ncol=length(log.y)+1, nrow=sim) for(i in 1:sim){ try({ data <- generate.data.1(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta, sigma=sigma) #order.y <- data$y[order(data$y)] m.orm <- orm(y~z1+z2, data=data, family="probit", x=TRUE, y=TRUE) cdf.result <- cdf.orm(m.orm, new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1)), at.y=exp(log.y),se=se) mean.result <- data.frame(mean.orm(m.orm, new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1)), se=se)) control.est[i, ] <- c(mean.result$est[1], cdf.result$est[1,]) trt.est[i, ] <- c(mean.result$est[2], cdf.result$est[2,]) control.est.1[i, ] <- c(mean.result$est[3], cdf.result$est[3,]) trt.est.1[i, ] <- c(mean.result$est[4], cdf.result$est[4,]) if(se){ control.se[i, ] <- c(mean.result$se[1], cdf.result$se[1,]) trt.se[i, ] <- c(mean.result$se[2], cdf.result$se[2,]) control.se.1[i, ] <- c(mean.result$se[3], cdf.result$se[3,]) trt.se.1[i, ] <- c(mean.result$se[4], cdf.result$se[4,]) control.lb[i, ] <- c(mean.result$lb[1], cdf.result$lb[1,]) control.ub[i, ] <- c(mean.result$ub[1], cdf.result$ub[1,]) control.lb.1[i, ] <- c(mean.result$lb[3], cdf.result$lb[3,]) control.ub.1[i, ] <- c(mean.result$ub[3], cdf.result$ub[3,]) trt.lb[i, ] <- c(mean.result$lb[2], cdf.result$lb[2,]) trt.ub[i, ] <- c(mean.result$ub[2], cdf.result$ub[2,]) trt.lb.1[i, ] <- c(mean.result$lb[4], cdf.result$lb[4,]) trt.ub.1[i, ] <- c(mean.result$ub[4], cdf.result$ub[4,]) } }) } return(list(control.est=control.est, trt.est=trt.est, control.se=control.se, trt.se=trt.se, control.lb=control.lb, trt.lb=trt.lb, control.ub=control.ub, trt.ub=trt.ub, control.est.1=control.est.1, trt.est.1=trt.est.1, control.se.1=control.se.1, trt.se.1=trt.se.1, control.lb.1=control.lb.1, trt.lb.1=trt.lb.1, control.ub.1=control.ub.1, trt.ub.1=trt.ub.1)) } seed=1 p <- 0.5 alpha <- 0 beta <- c(1, -0.5) sigma <- 1 log.y=c(-1, -0.33, 0.5, 1.33, 2) sim=10^4 sim1_cdfmean.n25 <- sim1_cdfmean.fun(n=25, sim=sim) sim1_cdfmean.n50 <- sim1_cdfmean.fun(n=50, sim=sim) sim1_cdfmean.n100 <- sim1_cdfmean.fun(n=100, sim=sim) sim1_cdfmean.n200 <- sim1_cdfmean.fun(n=200, sim=sim) sim1_cdfmean.n500 <- sim1_cdfmean.fun(n=500, sim=sim) sim1_cdfmean.n1000 <- sim1_cdfmean.fun(n=1000, sim=sim) sim1_cdfmean.summary <- function(result, beta=c(1, -0.5),alpha=0,sigma=1,alpha.y=c(-1, -0.33, 0.5, 1.33, 2), family=c("probit", "loglog")){ if(family[1]=="probit"){ true.cdf.01 <- pnorm(log.y, alpha+c(0,1)%*%beta, sigma) true.cdf.11 <- pnorm(log.y, alpha+c(1,1)%*%beta) true.mean.01 <- exp(alpha+c(0,1)%*%beta+sigma^2/2) true.mean.11 <- exp(alpha+c(1,1)%*%beta+sigma^2/2) true.mean.00 <- exp(alpha+c(0,0)%*%beta+sigma^2/2) true.mean.10 <- exp(alpha+c(1,0)%*%beta+sigma^2/2) true.mean <- c(true.mean.00, true.mean.10, true.mean.01, true.mean.11) } else if (family[1]=="loglog"){ true.cdf.01 <- pGumbelMin(log.y, alpha+c(0,1)%*%beta, sigma) true.cdf.11 <- pGumbelMin(log.y, alpha+c(1,1)%*%beta, sigma) true.mean.01 <- mean(exp(rGumbelMin(10^6, alpha+c(0,1)%*%beta, sigma))) true.mean.11 <- mean(exp(rGumbelMin(10^6, alpha+c(1,1)%*%beta, sigma))) true.mean.00 <- mean(exp(rGumbelMin(10^6, alpha+c(0,0)%*%beta, sigma))) true.mean.10 <- mean(exp(rGumbelMin(10^6, alpha+c(1,0)%*%beta, sigma))) true.mean <- c(true.mean.00, true.mean.10, true.mean.01, true.mean.11) } mean.est <- c(mean(result$control.est[,1], na.rm=TRUE), mean(result$trt.est[,1], na.rm=TRUE), mean(result$control.est.1[,1], na.rm=TRUE), mean(result$trt.est.1[,1], na.rm=TRUE)) mean.emp.se <-c(sd(result$control.est[,1], na.rm=TRUE), sd(result$trt.est[,1], na.rm=TRUE), sd(result$control.est.1[,1], na.rm=TRUE), sd(result$trt.est.1[,1], na.rm=TRUE)) mean.est.se <- c(mean(result$control.se[,1], na.rm=TRUE), mean(result$trt.se[,1], na.rm=TRUE), mean(result$control.se.1[,1], na.rm=TRUE), mean(result$trt.se.1[,1], na.rm=TRUE)) mean.coverage <- c(mean(true.mean[1] >=result$control.lb[, 1] & result$control.ub[,1]>=true.mean[1], na.rm=TRUE), mean(true.mean[2] >=result$trt.lb[, 1] & result$trt.ub[,1]>=true.mean[2], na.rm=TRUE), mean(true.mean[3] >=result$control.lb.1[, 1] & result$control.ub.1[,1]>=true.mean[3], na.rm=TRUE), mean(true.mean[4] >=result$trt.lb.1[, 1] & result$trt.ub.1[,1]>=true.mean[4], na.rm=TRUE) ) cdf.01.est <- apply(result$control.est.1[,-1], 2, FUN=function(x) mean(x, na.rm=TRUE)) cdf.11.est <- apply(result$trt.est.1[,-1], 2, FUN=function(x) mean(x, na.rm=TRUE)) cdf.01.est.se <- apply(result$control.est.1[,-1], 2, FUN=function(x) sd(x, na.rm=TRUE)) cdf.11.est.se <- apply(result$trt.est.1[,-1], 2, FUN=function(x) sd(x, na.rm=TRUE)) cdf.01.emp.se <- apply(result$control.se.1[,-1], 2, FUN=function(x) mean(x, na.rm=TRUE)) cdf.11.emp.se <- apply(result$trt.se.1[,-1], 2, FUN=function(x) mean(x, na.rm=TRUE)) cdf.01.coverage <- sapply(1:(length(log.y)), FUN=function(i) mean( true.cdf.01[i]>=result$control.lb.1[,i+1]&true.cdf.01[i]<=result$control.ub.1[, i+1], na.rm=TRUE)) cdf.11.coverage <- sapply(1:(length(log.y)), FUN=function(i) mean( true.cdf.11[i]>=result$trt.lb.1[,i+1]&true.cdf.11[i]<=result$trt.ub.1[, i+1], na.rm=TRUE)) mean.out <- cbind(format(round(true.mean, 2), nsmall=2), format(round(mean.est, 3), nsmall=3), format(round(mean.est.se, 3), nsmall=3), format(round(mean.emp.se, 3), nsmall=3), format(round(mean.coverage, 3), nsmall=3)) rownames(mean.out) <- c("mean|00", "mean|10", "mean|01", "mean|11") cdf.01.out <- cbind(format(round(true.cdf.01, 4),4), format(round(cdf.01.est, 4),4), format(round(cdf.01.est.se, 4),4), format(round(cdf.01.emp.se, 4),4), format(round(cdf.01.coverage, 3),3) ) cdf.11.out <- cbind(format(round(true.cdf.11, 4),4), format(round(cdf.11.est, 4),4), format(round(cdf.11.est.se, 4),4), format(round(cdf.11.emp.se, 4),4), format(round(cdf.11.coverage, 3),3) ) result <- list(mean.out=mean.out, cdf.01.out=cdf.01.out, cdf.11.out=cdf.11.out) return(result) } sim1_cdfmean.probit.n25 <- sim1_cdfmean.summary(sim1_cdfmean.n25) sim1_cdfmean.probit.n50 <- sim1_cdfmean.summary(sim1_cdfmean.n50) sim1_cdfmean.probit.n100 <- sim1_cdfmean.summary(sim1_cdfmean.n100) sim1_cdfmean.probit.n200 <- sim1_cdfmean.summary(sim1_cdfmean.n200) sim1_cdfmean.probit.n500 <- sim1_cdfmean.summary(sim1_cdfmean.n500) sim1_cdfmean.probit.n1000 <- sim1_cdfmean.summary(sim1_cdfmean.n1000) ###### Figure 8 (i) error ~ Normal, Table S.3 (i) error ~ Normal sim1_cdf.probit.out <- rbind(rep("", 5), rbind(sim1_cdfmean.probit.n25$cdf.11.out), rep("", 5), rbind(sim1_cdfmean.probit.n50$cdf.11.out), rep("", 5), rbind(sim1_cdfmean.probit.n100$cdf.11.out), rep("", 5), rbind(sim1_cdfmean.probit.n200$cdf.11.out), rep("", 5), rbind(sim1_cdfmean.probit.n500$cdf.11.out), rep("", 5), rbind(sim1_cdfmean.probit.n1000$cdf.11.out)) ###### Figure S.1 (i) error ~ Normal, Table S.4 (i) error ~ Normal sim1_mean.probit.out <- rbind(rep("", 5), sim1_cdfmean.probit.n25$mean.out, rep("", 5), sim1_cdfmean.probit.n50$mean.out, rep("", 5), sim1_cdfmean.probit.n100$mean.out, rep("", 5), sim1_cdfmean.probit.n200$mean.out, rep("", 5), sim1_cdfmean.probit.n500$mean.out, rep("", 5), sim1_cdfmean.probit.n1000$mean.out) ########### Sim 1 (i) quantiles generate.data.1 <- function(seed=1, n=200, p=0.5, alpha=0, beta=c(1, -0.5), sigma=1){ set.seed(seed) z1 <- sample(c(0,1), size=n, replace=TRUE, prob=c(1-p, p)) z2 <- rnorm(n, 0, 1) #y.star <- rnorm(n, alpha+beta*z, sigma) #y <- exp(y.star) log.y <- rnorm(n, alpha+beta[1]*z1 + beta[2]*z2, sigma) data <- data.frame(y=exp(log.y), z1=z1, z2=z2) return(data) } sim1_quantile.fun <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1, -0.5), sigma=1, probs=c(0.1, 0.25, 0.5, 0.75, 0.9), se=TRUE){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] est.00 <- est.10 <- est.01<- est.11 <- matrix(NA, ncol=length(probs), nrow=sim) lb.00 <- lb.10 <- lb.01<- lb.11 <- matrix(NA, ncol=length(probs), nrow=sim) ub.00 <- ub.10 <- ub.01<- ub.11 <- matrix(NA, ncol=length(probs), nrow=sim) for(i in 1:sim){ try({ data <- generate.data.1(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta, sigma=sigma) m.orm <- orm(y~z1+z2, data=data, family="probit", x=TRUE, y=TRUE) result <- quantile.orm(mod=m.orm, new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1)), probs=probs, se=TRUE) est.00[i, ] <- result$quantile[1,] est.10[i, ] <- result$quantile[2,] est.01[i, ] <- result$quantile[3,] est.11[i, ] <- result$quantile[4,] lb.00[i, ] <- result$lb[1,] lb.10[i, ] <- result$lb[2,] lb.01[i, ] <- result$lb[3,] lb.11[i, ] <- result$lb[4,] ub.00[i, ] <- result$ub[1,] ub.10[i, ] <- result$ub[2,] ub.01[i, ] <- result$ub[3,] ub.11[i, ] <- result$ub[4,] }) } result <- list(result.00=list(est=est.00, lb=lb.00, ub=ub.00), result.10=list(est=est.10, lb=lb.10, ub=ub.10), result.01=list(est=est.01, lb=lb.01, ub=ub.01), result.11=list(est=est.11, lb=lb.11, ub=ub.11)) return(result) } seed=1 p <- 0.5 alpha <- 0 beta <- c(1, -0.5) sigma <- 1 probs=c(0.1, 0.25, 0.5, 0.75, 0.9) se=TRUE sim=10^4 sim1_quantile.n25 <- sim1_quantile.fun(n=25, sim=sim) sim1_quantile.n50 <- sim1_quantile.fun(n=50, sim=sim) sim1_quantile.n100 <- sim1_quantile.fun(n=100, sim=sim) sim1_quantile.n200 <- sim1_quantile.fun(n=200, sim=sim) sim1_quantile.n500 <- sim1_quantile.fun(n=500, sim=sim) sim1_quantile.n1000 <- sim1_quantile.fun(n=1000, sim=sim) sim1_quantile.summary <- function(result, true.quantile){ true <- true.quantile est <- apply(result$est, 2, FUN=function(x) mean(x, na.rm=TRUE)) emp.se <- apply(result$est, 2, FUN=function(x) sd(x, na.rm=TRUE)) mse <- apply(result$est -matrix(true.quantile, ncol=length(true.quantile), nrow=dim(result$est)[1], byrow=T ), 2, FUN=function(x) mean(x^2, na.rm=TRUE)) cover <- sapply(1:length(true.quantile), FUN=function(i) true.quantile[i]>=result$lb[,i] & true.quantile[i]<=result$ub[,i]) coverage <- apply(cover, 2, FUN=function(x) mean(x, na.rm=TRUE)) result <- cbind(format(round(true, 3), nsmall=3), format(round(est, 3), nsmall=3), format(round(emp.se, 4), nsmall=4), format(round(coverage, 3), nsmall=3) ) } new.data <- rbind(c(0,1), c(1, 1)) true.mean <- beta %*% new.data sim1_quantile.probit.n25 <- sim1_quantile.summary(result=sim1_quantile.n25$result.11, true.quantile=exp(qnorm(c(0.1, 0.25, 0.5, 0.75, 0.9), mean=true.mean[2], sd=1))) sim1_quantile.probit.n50 <- sim1_quantile.summary(result=sim1_quantile.n50$result.11, true.quantile=exp(qnorm(c(0.1, 0.25, 0.5, 0.75, 0.9), mean=true.mean[2], sd=1))) sim1_quantile.probit.n100 <- sim1_quantile.summary(result=sim1_quantile.n100$result.11, true.quantile=exp(qnorm(c(0.1, 0.25, 0.5, 0.75, 0.9), mean=true.mean[2], sd=1))) sim1_quantile.probit.n200 <- sim1_quantile.summary(result=sim1_quantile.n200$result.11, true.quantile=exp(qnorm(c(0.1, 0.25, 0.5, 0.75, 0.9), mean=true.mean[2], sd=1))) sim1_quantile.probit.n500 <- sim1_quantile.summary(result=sim1_quantile.n500$result.11, true.quantile=exp(qnorm(c(0.1, 0.25, 0.5, 0.75, 0.9), mean=true.mean[2], sd=1))) sim1_quantile.probit.n1000 <- sim1_quantile.summary(result=sim1_quantile.n1000$result.11, true.quantile=exp(qnorm(c(0.1, 0.25, 0.5, 0.75, 0.9), mean=true.mean[2], sd=1))) ###### Figure S.2 (i) error ~ Normal, Table S.5 (i) error ~ Normal sim1_quantile.probit.out <- rbind(rep("", 4), rbind(sim1_quantile.probit.n25), rep("", 4), rbind(sim1_quantile.probit.n50), rep("", 4), rbind(sim1_quantile.probit.n100), rep("", 4), rbind(sim1_quantile.probit.n200), rep("", 4), rbind(sim1_quantile.probit.n500), rep("", 4), rbind(sim1_quantile.probit.n1000) ) ##ii) extreme value error distribution ##### Sim 1 (ii) regression coefficients ########### rGumbelMin<- function(n, mu=0, sigma=1){ u <- runif(n, min=0, max=1) x <- mu + sigma*log(-log(1-u)) return(x) } dGumbelMin <- function(x, mu=0, sigma=1){ df <- 1/sigma *exp((x-mu)/sigma)*exp(-exp((x-mu)/sigma)) return(df) } pGumbelMin <- function(x, mu=0, sigma=1){ pf <- 1-exp(-exp((x-mu)/sigma)) return(pf) } qGumbelMin <- function(p, mu=0, sigma=1){ x <- mu + sigma*(-log(-log(1-p))) } generate.data.2 <- function(seed, n, p=0.5,alpha=0, beta=c(1, -0.5), sigma=1){ set.seed(seed) z1 <- sample(c(0,1), size=n, replace=TRUE, prob=c(1-p, p)) z2 <- rnorm(n, 0, 1) y <- rGumbelMin(n, mu=alpha+beta[1]*z1+beta[2]*z2, sigma=sigma) data <- data.frame(y, z1=z1, z2=z2) return(data) } sim2_coeff.fun <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1,-0.5), sigma=1, log.y=c(-1, -0.33, 0.5, 1.33, 2), se=TRUE){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] alpha.y <- matrix(NA, ncol=length(log.y), nrow=sim) alpha.y.se <- matrix(NA, ncol=length(log.y), nrow=sim) beta.est <- matrix(NA, ncol=length(beta), nrow=sim) beta.se <- matrix(NA, ncol=length(beta), nrow=sim) for(i in 1:sim){ try({ data <- generate.data.2(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta, sigma=sigma) order.y <- data$y[order(data$y)] m.orm <- orm(y~z1+z2, data=data, family=loglog) beta.est[i,] <- m.orm$coef[c(n, n+1)] beta.se[i, 1] <- sqrt(m.orm$var["z1", "z1"]) beta.se[i, 2] <- sqrt(m.orm$var["z2", "z2"]) alpha.y[i,] <- -sapply(log.y, FUN=function(x) {if(x<=min(data$y)) result <- Inf else if(x > max(data$y)) result <- -Inf else m.orm$coef[which(order.y>=x)[1]-1]}) if(se){ var <- as.matrix(solve(m.orm$info.matrix)) alpha.y.se[i,] <- sqrt(sapply(log.y, FUN=function(x) {if(x<=min(data$y)) result <- Inf else if(x > max(data$y)) result <- Inf else var[which(order.y>=x)[1]-1, which(order.y>=x)[1]-1]}) ) } }) } return(list(beta.est=beta.est, beta.se=beta.se, alpha.y=alpha.y, alpha.y.se=alpha.y.se)) } seed=1 p <- 0.5 alpha <- 0 beta <- c(1, -0.5) sigma <- 1 log.y=c(-1, -0.33, 0.5, 1.33, 2) sim=10^4 #sim=100 sim2_coeff.n25 <- sim2_coeff.fun(n=25, sim=sim) sim2_coeff.n50 <- sim2_coeff.fun(n=50, sim=sim) sim2_coeff.n100 <- sim2_coeff.fun(n=100, sim=sim) sim2_coeff.n200 <- sim2_coeff.fun(n=200, sim=sim) sim2_coeff.n500 <- sim2_coeff.fun(n=500, sim=sim) sim2_coeff.n1000 <- sim2_coeff.fun(n=1000, sim=sim) sim2_coeff.summary <- function(result, beta=c(1, -0.5),alpha.y=c(-1, -0.33, 0.5, 1.33, 2)){ mean.est <- c(apply(result$beta.est, 2, FUN=function(x) mean(x, na.rm=TRUE)), apply(ifelse(is.infinite(result$alpha.y), NA, result$alpha.y), 2, FUN=function(x) mean(x, na.rm=TRUE))) mean.se <- c(apply(result$beta.se, 2, FUN=function(x) mean(x, na.rm=TRUE)), apply(ifelse(is.infinite(result$alpha.y.se), NA, result$alpha.y.se), 2, FUN=function(x) mean(x, na.rm=TRUE))) emp.se <- c(apply(result$beta.est, 2, FUN=function(x) sd(x, na.rm=TRUE)), apply(ifelse(is.infinite(result$alpha.y), NA, result$alpha.y), 2, FUN=function(x) sd(x, na.rm=TRUE))) mse <- c(apply((result$beta.est-matrix(beta, nrow=dim(result$beta.est)[1], ncol=length(beta),byrow=TRUE))^2, 2, FUN=function(x) mean(x, na.rm=TRUE)), apply((ifelse(is.infinite(result$alpha.y), NA, result$alpha.y)-matrix(alpha.y, nrow=dim(result$alpha.y)[1], ncol=length(alpha.y), byrow=TRUE))^2, 2 ,FUN=function(x) mean(x, na.rm=TRUE))) beta.lb <-result$beta.est+qnorm(0.025, 0, 1)*result$beta.se beta.ub <- result$beta.est-qnorm(0.025, 0, 1)*result$beta.se alpha.lb <- cbind(result$alpha.y+qnorm(0.025, 0, 1)*result$alpha.y.se) alpha.ub <- cbind(result$alpha.y-qnorm(0.025, 0, 1)*result$alpha.y.se) alpha.lb <- ifelse(is.infinite(alpha.lb), NA, alpha.lb) alpha.ub <- ifelse(is.infinite(alpha.ub), NA, alpha.ub) alpha.coverage <- rep(NA, length(alpha.y)) for(i in 1:length(alpha.y)){ alpha.coverage[i] <-mean(apply(cbind(alpha.lb[,i], alpha.ub[,i]), 1, FUN=function(x) alpha.y[i]>=x[1] & alpha.y[i]<=x[2]), na.rm=TRUE) } beta.coverage <- rep(NA, length(beta)) for(i in 1:length(beta)){ beta.coverage[i] <-mean(apply(cbind(beta.lb[,i], beta.ub[,i]), 1, FUN=function(x) beta[i]>=x[1] & beta[i]<=x[2]), na.rm=TRUE) } coverage <- c(beta.coverage, alpha.coverage) result <- cbind(format(c(beta, alpha.y), nsmall=1), format(round(mean.est, 2), nsmall=2), format(round(mean.se, 3) , nsmall=3), format(round(emp.se, 3) , nsmall=3), format(round(mse, 4) , nsmall=3), format(round(coverage, 3) , nsmall=3)) return(result) } ######## Table S.2; Figure 6 (ii) error ~ extreme Type I ################ sim2_coeff.loglog.n25 <- sim2_coeff.summary(sim2_coeff.n25) sim2_coeff.loglog.n50 <- sim2_coeff.summary(sim2_coeff.n50) sim2_coeff.loglog.n100 <- sim2_coeff.summary(sim2_coeff.n100) sim2_coeff.loglog.n200 <- sim2_coeff.summary(sim2_coeff.n200) sim2_coeff.loglog.n500 <- sim2_coeff.summary(sim2_coeff.n500) sim2_coeff.loglog.n1000 <- sim2_coeff.summary(sim2_coeff.n1000) ######## sim 1 (ii) regression coefficients compared with Cox proportional hazards model sim2_cox.fun <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1,-0.5), sigma=1, log.y=c(-1, -0.33, 0.5, 1.33, 2)){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] alpha.y <- matrix(NA, ncol=length(log.y), nrow=sim) alpha.y.se <- matrix(NA, ncol=length(log.y), nrow=sim) beta.est <- matrix(NA, ncol=length(beta), nrow=sim) beta.se <- matrix(NA, ncol=length(beta), nrow=sim) for(i in 1:sim){ try({ data <- generate.data.2(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta, sigma=sigma) order.y <- data$y[order(data$y)] index <- sapply(log.y, FUN=function(x) {if(x<=min(order.y)[1]) result <- Inf else if(x > max(order.y)[1]) result <- -Inf else which(order.y>=x)[1]-1}) m.cox <- coxph(Surv(y, rep(1, n))~z1+z2, data=data) beta.est[i,] <- -m.cox$coef beta.se[i, 1] <- sqrt(m.cox$var[1, 1]) beta.se[i, 2] <- sqrt(m.cox$var[2, 2]) chz <- basehaz(m.cox, centered=FALSE)[,1] est.F <- 1-exp(-chz) alpha.y[i,] <- -qGumbelMin(est.F[index]) }) } return(list(beta.est=beta.est, beta.se=beta.se, alpha.y=alpha.y, alpha.y.se=alpha.y.se)) } seed=1 p <- 0.5 alpha <- 0 beta <- c(1, -0.5) sigma <- 1 log.y=c(-1, -0.33, 0.5, 1.33, 2) sim=10^4 sim2_cox.n25 <- sim2_cox.fun(n=25, sim=sim) sim2_cox.n50 <- sim2_cox.fun(n=50, sim=sim) sim2_cox.n100 <- sim2_cox.fun(n=100, sim=sim) sim2_cox.n200 <- sim2_cox.fun(n=200, sim=sim) sim2_cox.n500 <- sim2_cox.fun(n=500, sim=sim) sim2_cox.n1000 <- sim2_cox.fun(n=1000, sim=sim) RE.summary <- function(result1, result2,beta=c(1, -0.5)){ MSE1 <- apply(result1$beta.est -matrix(beta, nrow=dim(result1$beta.est)[1], ncol=length(beta),byrow=TRUE), 2, FUN=function(x) mean(x^2, na.rm=TRUE)) MSE2 <- apply(result2$beta.est -matrix(beta, nrow=dim(result2$beta.est)[1], ncol=length(beta),byrow=TRUE), 2, FUN=function(x) mean(x^2, na.rm=TRUE)) re <- MSE2/MSE1 return(re) } sim2_beta.re.n25 <- RE.summary(sim2_coeff.n25, sim2_cox.n25) sim2_beta.re.n50 <- RE.summary(sim2_coeff.n50, sim2_cox.n50) sim2_beta.re.n100 <- RE.summary(sim2_coeff.n100, sim2_cox.n100) sim2_beta.re.n200 <- RE.summary(sim2_coeff.n200, sim2_cox.n200) sim2_beta.re.n500 <- RE.summary(sim1_coeff.n500, sim2_cox.n500) sim2_beta.re.n1000 <- RE.summary(sim1_coeff.n1000, sim2_cox.n1000) ##### Figure 7: relative efficiency (b) sim2_re.loglog.beta.1 <- c(sim2_beta.re.n25[1], sim2_beta.re.n50[1], sim2_beta.re.n100[1], sim2_beta.re.n200[1], sim2_beta.re.n500[1], sim2_beta.re.n1000[1]) sim2_re.loglog.beta.2 <- c(sim2_beta.re.n25[2], sim2_beta.re.n50[2], sim2_beta.re.n100[2], sim2_beta.re.n200[2], sim2_beta.re.n500[2], sim2_beta.re.n1000[2]) ############ Sim 1 (ii): cdf and mean ########## generate.data.2 <- function(seed, n, p=0.5,alpha=0, beta=c(1, -0.5), sigma=1){ set.seed(seed) z1 <- sample(c(0,1), size=n, replace=TRUE, prob=c(1-p, p)) z2 <- rnorm(n, 0, 1) log.y <- rGumbelMin(n, mu=alpha+beta[1]*z1+beta[2]*z2, sigma=sigma) data <- data.frame(y=exp(log.y), z1=z1, z2=z2) return(data) } sim2_cdfmean.fun <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1, -0.5), sigma=1, log.y=c(-1, -0.33, 0.5, 1.33, 2), se=TRUE){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] control.est <- matrix(NA, ncol=length(log.y)+1, nrow=sim) trt.est <- matrix(NA, ncol=length(log.y)+1, nrow=sim) control.est.1 <- matrix(NA, ncol=length(log.y)+1, nrow=sim) trt.est.1 <- matrix(NA, ncol=length(log.y)+1, nrow=sim) control.se <- matrix(NA, ncol=length(log.y)+1, nrow=sim) trt.se <- matrix(NA, ncol=length(log.y)+1, nrow=sim) control.se.1 <- matrix(NA, ncol=length(log.y)+1, nrow=sim) trt.se.1 <- matrix(NA, ncol=length(log.y)+1, nrow=sim) control.lb <- matrix(NA, ncol=length(log.y)+1, nrow=sim) control.ub <- matrix(NA, ncol=length(log.y)+1, nrow=sim) trt.lb <- matrix(NA, ncol=length(log.y)+1, nrow=sim) trt.ub <- matrix(NA, ncol=length(log.y)+1, nrow=sim) control.lb.1 <- matrix(NA, ncol=length(log.y)+1, nrow=sim) control.ub.1 <- matrix(NA, ncol=length(log.y)+1, nrow=sim) trt.lb.1 <- matrix(NA, ncol=length(log.y)+1, nrow=sim) trt.ub.1 <- matrix(NA, ncol=length(log.y)+1, nrow=sim) for(i in 1:sim){ try({ data <- generate.data.2(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta, sigma=sigma) m.orm <- orm(y~z1+z2, data=data, family=loglog, x=TRUE, y=TRUE) cdf.result <- cdf.orm(m.orm, new.data=data.frame(z1=c( 0, 1), z2=c( 1, 1)), at.y=exp(log.y),se=se) mean.result <- data.frame(mean.orm(m.orm, new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1)), se=se)) control.est[i, ] <- c(mean.result$est[1]) trt.est[i, ] <- c(mean.result$est[2]) control.est.1[i, ] <- c(mean.result$est[3], cdf.result$est[1,]) trt.est.1[i, ] <- c(mean.result$est[4], cdf.result$est[2,]) if(se){ control.se[i, ] <- c(mean.result$se[1]) trt.se[i, ] <- c(mean.result$se[2]) control.se.1[i, ] <- c(mean.result$se[3], cdf.result$se[1,]) trt.se.1[i, ] <- c(mean.result$se[4], cdf.result$se[2,]) control.lb[i, ] <- c(mean.result$lb[1]) control.ub[i, ] <- c(mean.result$ub[1]) control.lb.1[i, ] <- c(mean.result$lb[3], cdf.result$lb[1,]) control.ub.1[i, ] <- c(mean.result$ub[3], cdf.result$ub[1,]) trt.lb[i, ] <- c(mean.result$lb[2]) trt.ub[i, ] <- c(mean.result$ub[2]) trt.lb.1[i, ] <- c(mean.result$lb[4], cdf.result$lb[2,]) trt.ub.1[i, ] <- c(mean.result$ub[4], cdf.result$ub[2,]) } }) } return(list(control.est=control.est, trt.est=trt.est, control.se=control.se, trt.se=trt.se, control.lb=control.lb, trt.lb=trt.lb, control.ub=control.ub, trt.ub=trt.ub, control.est.1=control.est.1, trt.est.1=trt.est.1, control.se.1=control.se.1, trt.se.1=trt.se.1, control.lb.1=control.lb.1, trt.lb.1=trt.lb.1, control.ub.1=control.ub.1, trt.ub.1=trt.ub.1)) } seed=1 p <- 0.5 alpha <- 0 beta <- c(1, -0.5) sigma <- 1 log.y=c(-1, -0.33, 0.5, 1.33, 2) sim=10^4 sim2_cdfmean.n25 <- sim2_cdfmean.fun(n=25, sim=sim) sim2_cdfmean.n50 <- sim2_cdfmean.fun(n=50, sim=sim) sim2_cdfmean.n100 <- sim2_cdfmean.fun(n=100, sim=sim) sim2_cdfmean.n200 <- sim2_cdfmean.fun(n=200, sim=sim) sim2_cdfmean.n500 <- sim2_cdfmean.fun(n=500, sim=sim) sim2_cdfmean.n1000 <- sim2_cdfmean.fun(n=1000, sim=sim) sim2_cdfmean.summary <- function(result, beta=c(1, -0.5),alpha=0,sigma=1,alpha.y=c(-1, -0.33, 0.5, 1.33, 2), family=c("probit", "loglog")){ if(family[1]=="probit"){ true.cdf.01 <- pnorm(log.y, alpha+c(0,1)%*%beta, sigma) true.cdf.11 <- pnorm(log.y, alpha+c(1,1)%*%beta) true.mean.01 <- exp(alpha+c(0,1)%*%beta+sigma^2/2) true.mean.11 <- exp(alpha+c(1,1)%*%beta+sigma^2/2) true.mean.00 <- exp(alpha+c(0,0)%*%beta+sigma^2/2) true.mean.10 <- exp(alpha+c(1,0)%*%beta+sigma^2/2) true.mean <- c(true.mean.00, true.mean.10, true.mean.01, true.mean.11) } else if (family[1]=="loglog"){ true.cdf.01 <- pGumbelMin(log.y, alpha+c(0,1)%*%beta, sigma) true.cdf.11 <- pGumbelMin(log.y, alpha+c(1,1)%*%beta, sigma) true.mean.01 <- mean(exp(rGumbelMin(10^6, alpha+c(0,1)%*%beta, sigma))) true.mean.11 <- mean(exp(rGumbelMin(10^6, alpha+c(1,1)%*%beta, sigma))) true.mean.00 <- mean(exp(rGumbelMin(10^6, alpha+c(0,0)%*%beta, sigma))) true.mean.10 <- mean(exp(rGumbelMin(10^6, alpha+c(1,0)%*%beta, sigma))) true.mean <- c(true.mean.00, true.mean.10, true.mean.01, true.mean.11) } mean.est <- c(mean(result$control.est[,1], na.rm=TRUE), mean(result$trt.est[,1], na.rm=TRUE), mean(result$control.est.1[,1], na.rm=TRUE), mean(result$trt.est.1[,1], na.rm=TRUE)) mean.emp.se <-c(sd(result$control.est[,1], na.rm=TRUE), sd(result$trt.est[,1], na.rm=TRUE), sd(result$control.est.1[,1], na.rm=TRUE), sd(result$trt.est.1[,1], na.rm=TRUE)) mean.est.se <- c(mean(result$control.se[,1], na.rm=TRUE), mean(result$trt.se[,1], na.rm=TRUE), mean(result$control.se.1[,1], na.rm=TRUE), mean(result$trt.se.1[,1], na.rm=TRUE)) mean.coverage <- c(mean(true.mean[1] >=result$control.lb[, 1] & result$control.ub[,1]>=true.mean[1], na.rm=TRUE), mean(true.mean[2] >=result$trt.lb[, 1] & result$trt.ub[,1]>=true.mean[2], na.rm=TRUE), mean(true.mean[3] >=result$control.lb.1[, 1] & result$control.ub.1[,1]>=true.mean[3], na.rm=TRUE), mean(true.mean[4] >=result$trt.lb.1[, 1] & result$trt.ub.1[,1]>=true.mean[4], na.rm=TRUE) ) cdf.01.est <- apply(result$control.est.1[,-1], 2, FUN=function(x) mean(x, na.rm=TRUE)) cdf.11.est <- apply(result$trt.est.1[,-1], 2, FUN=function(x) mean(x, na.rm=TRUE)) cdf.01.est.se <- apply(result$control.est.1[,-1], 2, FUN=function(x) sd(x, na.rm=TRUE)) cdf.11.est.se <- apply(result$trt.est.1[,-1], 2, FUN=function(x) sd(x, na.rm=TRUE)) cdf.01.emp.se <- apply(result$control.se.1[,-1], 2, FUN=function(x) mean(x, na.rm=TRUE)) cdf.11.emp.se <- apply(result$trt.se.1[,-1], 2, FUN=function(x) mean(x, na.rm=TRUE)) cdf.01.coverage <- sapply(1:(length(log.y)), FUN=function(i) mean( true.cdf.01[i]>=result$control.lb.1[,i+1]&true.cdf.01[i]<=result$control.ub.1[, i+1], na.rm=TRUE)) cdf.11.coverage <- sapply(1:(length(log.y)), FUN=function(i) mean( true.cdf.11[i]>=result$trt.lb.1[,i+1]&true.cdf.11[i]<=result$trt.ub.1[, i+1], na.rm=TRUE)) mean.out <- cbind(format(round(true.mean, 2), nsmall=2), format(round(mean.est, 3), nsmall=3), format(round(mean.est.se, 3), nsmall=3), format(round(mean.emp.se, 3), nsmall=3), format(round(mean.coverage, 3), nsmall=3)) rownames(mean.out) <- c("mean|00", "mean|10", "mean|01", "mean|11") cdf.01.out <- cbind(format(round(true.cdf.01, 4),4), format(round(cdf.01.est, 4),4), format(round(cdf.01.est.se, 4),4), format(round(cdf.01.emp.se, 4),4), format(round(cdf.01.coverage, 3),3) ) cdf.11.out <- cbind(format(round(true.cdf.11, 4),4), format(round(cdf.11.est, 4),4), format(round(cdf.11.est.se, 4),4), format(round(cdf.11.emp.se, 4),4), format(round(cdf.11.coverage, 3),3) ) result <- list(mean.out=mean.out, cdf.01.out=cdf.01.out, cdf.11.out=cdf.11.out) return(result) } sim2_cdfmean.loglog.n25 <- sim1_cdfmean.summary(sim2_cdfmean.n25) sim2_cdfmean.loglog.n50 <- sim1_cdfmean.summary(sim2_cdfmean.n50) sim2_cdfmean.loglog.n100 <- sim1_cdfmean.summary(sim2_cdfmean.n100) sim2_cdfmean.loglog.n200 <- sim1_cdfmean.summary(sim2_cdfmean.n200) sim2_cdfmean.loglog.n500 <- sim1_cdfmean.summary(sim2_cdfmean.n500) sim2_cdfmean.loglog.n1000 <- sim1_cdfmean.summary(sim2_cdfmean.n1000) ###### Figure 8 (ii) error ~ extreme value (type I), Table S.3 (ii) error ~ extreme value (type I) sim2_cdf.loglog.out <- rbind(rep("", 5), rbind(sim2_cdfmean.loglog.n25$cdf.11.out), rep("", 5), rbind(sim2_cdfmean.loglog.n50$cdf.11.out), rep("", 5), rbind(sim2_cdfmean.loglog.n100$cdf.11.out), rep("", 5), rbind(sim2_cdfmean.loglog.n200$cdf.11.out), rep("", 5), rbind(sim2_cdfmean.loglog.n500$cdf.11.out), rep("", 5), rbind(sim2_cdfmean.loglog.n1000$cdf.11.out)) ###### Figure S.1 (ii) error ~ extreme value (type I), Table S.4 (ii) error ~ extreme value (type I) sim2_mean.loglog.out <- rbind(rep("", 5), sim2_cdfmean.loglog.n25$mean.out, rep("", 5), sim2_cdfmean.loglog.n50$mean.out, rep("", 5), sim2_cdfmean.loglog.n100$mean.out, rep("", 5), sim2_cdfmean.loglog.n200$mean.out, rep("", 5), sim2_cdfmean.loglog.n500$mean.out, rep("", 5), sim2_cdfmean.loglog.n1000$mean.out) ############# Sim 1(ii): quantiles ############ generate.data.2 <- function(seed, n, p=0.5,alpha=0, beta=c(1, -0.5), sigma=1){ set.seed(seed) z1 <- sample(c(0,1), size=n, replace=TRUE, prob=c(1-p, p)) z2 <- rnorm(n, 0, 1) log.y <- rGumbelMin(n, mu=alpha+beta[1]*z1+beta[2]*z2, sigma=sigma) data <- data.frame(y=exp(log.y), z1=z1, z2=z2) return(data) } sim2_quantile.fun <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1, -0.5), sigma=1, probs=c(0.1, 0.25, 0.5, 0.75, 0.9), se=TRUE){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] est.00 <- est.10 <- est.01<- est.11 <- matrix(NA, ncol=length(probs), nrow=sim) lb.00 <- lb.10 <- lb.01<- lb.11 <- matrix(NA, ncol=length(probs), nrow=sim) ub.00 <- ub.10 <- ub.01<- ub.11 <- matrix(NA, ncol=length(probs), nrow=sim) for(i in 1:sim){ try({ data <- generate.data.2(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta, sigma=sigma) m.orm <- orm(y~z1+z2, data=data, family=loglog, x=TRUE, y=TRUE) result <- quantile.orm(mod=m.orm, new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1)), probs=probs, se=TRUE) est.00[i, ] <- result$quantile[1,] est.10[i, ] <- result$quantile[2,] est.01[i, ] <- result$quantile[3,] est.11[i, ] <- result$quantile[4,] lb.00[i, ] <- result$lb[1,] lb.10[i, ] <- result$lb[2,] lb.01[i, ] <- result$lb[3,] lb.11[i, ] <- result$lb[4,] ub.00[i, ] <- result$ub[1,] ub.10[i, ] <- result$ub[2,] ub.01[i, ] <- result$ub[3,] ub.11[i, ] <- result$ub[4,] }) } result <- list(result.00=list(est=est.00, lb=lb.00, ub=ub.00), result.10=list(est=est.10, lb=lb.10, ub=ub.10), result.01=list(est=est.01, lb=lb.01, ub=ub.01), result.11=list(est=est.11, lb=lb.11, ub=ub.11)) return(result) } seed=1 p <- 0.5 alpha <- 0 beta <- c(1, -0.5) sigma <- 1 probs=c(0.1, 0.25, 0.5, 0.75, 0.9) se=TRUE sim=10^4 #n=100 sim2_quantile.n25 <- sim2_quantile.fun(n=25, sim=sim) sim2_quantile.n50 <- sim2_quantile.fun(n=50, sim=sim) sim2_quantile.n100 <- sim2_quantile.fun(n=100, sim=sim) sim2_quantile.n200 <- sim2_quantile.fun(n=200, sim=sim) sim2_quantile.n500 <- sim2_quantile.fun(n=500, sim=sim) sim2_quantile.n1000 <- sim2_quantile.fun(n=1000, sim=sim) sim2_quantile.summary <- function(result, true.quantile){ true <- true.quantile est <- apply(result$est, 2, FUN=function(x) mean(x, na.rm=TRUE)) emp.se <- apply(result$est, 2, FUN=function(x) sd(x, na.rm=TRUE)) mse <- apply(result$est -matrix(true.quantile, ncol=length(true.quantile), nrow=dim(result$est)[1], byrow=T ), 2, FUN=function(x) mean(x^2, na.rm=TRUE)) cover <- sapply(1:length(true.quantile), FUN=function(i) true.quantile[i]>=result$lb[,i] & true.quantile[i]<=result$ub[,i]) coverage <- apply(cover, 2, FUN=function(x) mean(x, na.rm=TRUE)) result <- cbind(format(round(true, 3), nsmall=3), format(round(est, 3), nsmall=3), format(round(emp.se, 4), nsmall=4), format(round(coverage, 3), nsmall=3) ) } new.data <- rbind(c(0,1), c(1, 1)) true.mean <- beta %*% new.data qGumbelMin <- function(p, mean=0, sd=1){ x <- mu + sigma*(log(-log(1-p))) } sim2_quantile.loglog.n25 <- sim2_quantile.summary(result=sim2_quantile.n25.$result.11, true.quantile=exp(GumbelMin(c(0.1, 0.25, 0.5, 0.75, 0.9), mean=true.mean[2], sd=1))) sim2_quantile.loglog.n50 <- sim2_quantile.summary(result=sim2_quantile.n50$result.11, true.quantile=exp(GumbelMin(c(0.1, 0.25, 0.5, 0.75, 0.9), mean=true.mean[2], sd=1))) sim2_quantile.loglog.n100 <- sim2_quantile.summary(result=sim2_quantile.n100$result.11, true.quantile=exp(GumbelMin(c(0.1, 0.25, 0.5, 0.75, 0.9), mean=true.mean[2], sd=1))) sim2_quantile.loglog.n200 <- sim2_quantile.summary(result=sim2_quantile.n200$result.11, true.quantile=exp(GumbelMin(c(0.1, 0.25, 0.5, 0.75, 0.9), mean=true.mean[2], sd=1))) sim2_quantile.loglog.n500 <- sim2_quantile.summary(result=sim2_quantile.n500$result.11, true.quantile=exp(GumbelMin(c(0.1, 0.25, 0.5, 0.75, 0.9), mean=true.mean[2], sd=1))) sim2_quantile.loglog.n1000 <- sim2_quantile.summary(result=sim2_quantile.n1000.$result.11, true.quantile=exp(GumbelMin(c(0.1, 0.25, 0.5, 0.75, 0.9), mean=true.mean[2], sd=1))) ###### Figure S.2 (ii) error ~ extreme value (Type I), Table S.5 (ii) error ~ extreme value (Type I) sim2_quantile.loglog.out <- rbind(rep("", 4), rbind(sim2_quantile.loglog.n25), rep("", 4), rbind(sim2_quantile.loglog.n50), rep("", 4), rbind(sim2_quantile.loglog.n100), rep("", 4), rbind(sim2_quantile.loglog.n200), rep("", 4), rbind(sim2_quantile.loglog.n500), rep("", 4), rbind(sim2_quantile.loglog.n1000) ) ######################### sim 2: with misspecified link functions ############## rGumbelMin<- function(n, mu=0, sigma=1){ u <- runif(n, min=0, max=1) x <- mu + sigma*log(-log(1-u)) return(x) } rGumbelMax<- function(n, mu=0, sigma=1){ u <- runif(n, min=0, max=1) x <- mu + sigma*(-log(-log(u))) return(x) } generate.data.a <- function(seed=1, n=200, p=0.5, alpha=0, beta=c(1, -0.5)){ set.seed(seed) z1 <- sample(c(0,1), size=n, replace=TRUE, prob=c(1-p, p)) z2 <- rnorm(n, 0, 1) #y.star <- rnorm(n, alpha+beta*z, sigma) #y <- exp(y.star) y <- rnorm(n, alpha+beta[1]*z1 + beta[2]*z2, 1) data <- data.frame(y=y, z1=z1, z2=z2) return(data) } generate.data.b <- function(seed=1, n=200, p=0.5, alpha=0, beta=c(1, -0.5)){ set.seed(seed) z1 <- sample(c(0,1), size=n, replace=TRUE, prob=c(1-p, p)) z2 <- rnorm(n, 0, 1) #y.star <- rnorm(n, alpha+beta*z, sigma) #y <- exp(y.star) y <- rlogis(n, alpha+beta[1]*z1 + beta[2]*z2, 1) data <- data.frame(y=y, z1=z1, z2=z2) return(data) } generate.data.c <- function(seed, n, p=0.5,alpha=0, beta=c(1, -0.5)){ set.seed(seed) z1 <- sample(c(0,1), size=n, replace=TRUE, prob=c(1-p, p)) z2 <- rnorm(n, 0, 1) error <-rGumbelMin(n, mu=0, sigma=1)-digamma(1) y <- alpha+beta[1]*z1+beta[2]*z2 +error data <- data.frame(y, z1=z1, z2=z2) return(data) } generate.data.d <- function(seed=1, n=200, p=0.5, alpha=0, beta=c(1, -0.5)){ set.seed(seed) z1 <- sample(c(0,1), size=n, replace=TRUE, prob=c(1-p, p)) z2 <- rnorm(n, 0, 1) #error <- rgamma(n, 1, 1)-1 error <- rGumbelMax(n, 0, 1)+digamma(1) y <- alpha+beta[1]*z1+beta[2]*z2 +error data <- data.frame(y=y, z1=z1, z2=z2) return(data) } generate.data.e <- function(seed=1, n=200, p=0.5, alpha=0, beta=c(1, -0.5)){ set.seed(seed) z1 <- sample(c(0,1), size=n, replace=TRUE, prob=c(1-p, p)) z2 <- rnorm(n, 0, 1) #y.star <- rnorm(n, alpha+beta*z, sigma) #y <- exp(y.star) y <- alpha+beta[1]*z1 + beta[2]*z2 + rt(n, df=5) data <- data.frame(y=y, z1=z1, z2=z2) return(data) } generate.data.f <- function(seed=1, n=200, p=0.5, alpha=0, beta=c(1, -0.5)){ set.seed(seed) z1 <- sample(c(0,1), size=n, replace=TRUE, prob=c(1-p, p)) z2 <- rnorm(n, 0, 1) #y.star <- rnorm(n, alpha+beta*z, sigma) #y <- exp(y.star) y <- alpha+beta[1]*z1 + beta[2]*z2 + runif(n, -5, 5) data <- data.frame(y=y, z1=z1, z2=z2) return(data) } generate.data.g <- function(seed, n, p=0.5,alpha=0, beta=c(1, -0.5)){ set.seed(seed) z1 <- sample(c(0,1), size=n, replace=TRUE, prob=c(1-p, p)) z2 <- rnorm(n, 0, 1) error <-(rbeta(n, 5, 2) -5/(5+2))*sqrt((5+2)^2*(5+2+1)/5/2) y <- alpha+beta[1]*z1+beta[2]*z2 +error data <- data.frame(y, z1=z1, z2=z2) return(data) } generate.data.h <- function(seed=1, n=200, p=0.5, alpha=0, beta=c(1, -0.5)){ set.seed(seed) z1 <- sample(c(0,1), size=n, replace=TRUE, prob=c(1-p, p)) z2 <- rnorm(n, 0, 1) #error <- rgamma(n, 1, 1)-1 error <-(rbeta(n, 2, 5) -2/(5+2))*sqrt((5+2)^2*(5+2+1)/5/2) y <- alpha+beta[1]*z1+beta[2]*z2 +error data <- data.frame(y=y, z1=z1, z2=z2) return(data) } ##### Sim 2: means ########## sim3_mean.fun.a <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1,-0.5),new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1))){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] probit.00 <- matrix(NA, ncol=4, nrow=sim) probit.10 <- matrix(NA, ncol=4, nrow=sim) probit.01 <- matrix(NA, ncol=4, nrow=sim) probit.11 <- matrix(NA, ncol=4, nrow=sim) colnames(probit.00) <- c("est", "se", "lb", "ub") logit.00 <- matrix(NA, ncol=4, nrow=sim) logit.10 <- matrix(NA, ncol=4, nrow=sim) logit.01 <- matrix(NA, ncol=4, nrow=sim) logit.11 <- matrix(NA, ncol=4, nrow=sim) loglog.00 <- matrix(NA, ncol=4, nrow=sim) loglog.10 <- matrix(NA, ncol=4, nrow=sim) loglog.01 <- matrix(NA, ncol=4, nrow=sim) loglog.11 <- matrix(NA, ncol=4, nrow=sim) cloglog.00 <- matrix(NA, ncol=4, nrow=sim) cloglog.10 <- matrix(NA, ncol=4, nrow=sim) cloglog.01 <- matrix(NA, ncol=4, nrow=sim) cloglog.11 <- matrix(NA, ncol=4, nrow=sim) ols.00 <- matrix(NA, ncol=4, nrow=sim) ols.10 <- matrix(NA, ncol=4, nrow=sim) ols.01 <- matrix(NA, ncol=4, nrow=sim) ols.11 <- matrix(NA, ncol=4, nrow=sim) for(i in 1:sim){ try({ data <- generate.data.a(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta) orm.probit <- orm(y~z1+z2, data=data, family=probit, x=TRUE, y=TRUE) orm.logit <- orm(y~z1+z2, data=data, family=logistic, x=TRUE, y=TRUE) orm.loglog <- orm(y~z1+z2, data=data, family=loglog, x=TRUE, y=TRUE) orm.cloglog <- orm(y~z1+z2, data=data, family=cloglog, x=TRUE, y=TRUE) ols.mod <- ols(y~ z1+z2, data=data) mean.probit <- mean.orm(orm.probit, new.data, se=TRUE) mean.logit <- mean.orm(orm.logit, new.data, se=TRUE) mean.loglog <- mean.orm(orm.loglog, new.data, se=TRUE) mean.cloglog <- mean.orm(orm.cloglog, new.data, se=TRUE) mean.ols <- predict(ols.mod, new.data, se.fit=TRUE, conf.int=0.95) mean.ols <- cbind(mean.ols$linear.predictors, mean.ols$se.fit, mean.ols$lower, mean.ols$upper) probit.00[i,] <- mean.probit[1,] probit.10[i,] <- mean.probit[2,] probit.01[i,] <- mean.probit[3,] probit.11[i,] <- mean.probit[4,] logit.00[i,] <- mean.logit[1,] logit.10[i,] <- mean.logit[2,] logit.01[i,] <- mean.logit[3,] logit.11[i,] <- mean.logit[4,] loglog.00[i,] <- mean.loglog[1,] loglog.10[i,] <- mean.loglog[2,] loglog.01[i,] <- mean.loglog[3,] loglog.11[i,] <- mean.loglog[4,] cloglog.00[i,] <- mean.cloglog[1,] cloglog.10[i,] <- mean.cloglog[2,] cloglog.01[i,] <- mean.cloglog[3,] cloglog.11[i,] <- mean.cloglog[4,] ols.00[i,] <- mean.ols[1,] ols.10[i,] <- mean.ols[2,] ols.01[i,] <- mean.ols[3,] ols.11[i,] <- mean.ols[4,] }) } return(list(probit.00=probit.00, probit.10=probit.10, probit.01=probit.01, probit.11=probit.11, logit.00=logit.00, logit.10=logit.10, logit.01=logit.01, logit.11=logit.11, loglog.00=loglog.00, loglog.10=loglog.10, loglog.01=loglog.01, loglog.11=loglog.11, cloglog.00=cloglog.00, cloglog.10=cloglog.10, cloglog.01=cloglog.01, cloglog.11=cloglog.11, ols.00=ols.00, ols.10=ols.10, ols.01=ols.01, ols.11=ols.11 )) } sim3_mean.fun.b <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1,-0.5),new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1))){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] probit.00 <- matrix(NA, ncol=4, nrow=sim) probit.10 <- matrix(NA, ncol=4, nrow=sim) probit.01 <- matrix(NA, ncol=4, nrow=sim) probit.11 <- matrix(NA, ncol=4, nrow=sim) colnames(probit.00) <- c("est", "se", "lb", "ub") logit.00 <- matrix(NA, ncol=4, nrow=sim) logit.10 <- matrix(NA, ncol=4, nrow=sim) logit.01 <- matrix(NA, ncol=4, nrow=sim) logit.11 <- matrix(NA, ncol=4, nrow=sim) loglog.00 <- matrix(NA, ncol=4, nrow=sim) loglog.10 <- matrix(NA, ncol=4, nrow=sim) loglog.01 <- matrix(NA, ncol=4, nrow=sim) loglog.11 <- matrix(NA, ncol=4, nrow=sim) cloglog.00 <- matrix(NA, ncol=4, nrow=sim) cloglog.10 <- matrix(NA, ncol=4, nrow=sim) cloglog.01 <- matrix(NA, ncol=4, nrow=sim) cloglog.11 <- matrix(NA, ncol=4, nrow=sim) ols.00 <- matrix(NA, ncol=4, nrow=sim) ols.10 <- matrix(NA, ncol=4, nrow=sim) ols.01 <- matrix(NA, ncol=4, nrow=sim) ols.11 <- matrix(NA, ncol=4, nrow=sim) for(i in 1:sim){ try({ data <- generate.data.b(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta) orm.probit <- orm(y~z1+z2, data=data, family=probit, x=TRUE, y=TRUE) orm.logit <- orm(y~z1+z2, data=data, family=logistic, x=TRUE, y=TRUE) orm.loglog <- orm(y~z1+z2, data=data, family=loglog, x=TRUE, y=TRUE) orm.cloglog <- orm(y~z1+z2, data=data, family=cloglog, x=TRUE, y=TRUE) ols.mod <- ols(y~ z1+z2, data=data) mean.probit <- mean.orm(orm.probit, new.data, se=TRUE) mean.logit <- mean.orm(orm.logit, new.data, se=TRUE) mean.loglog <- mean.orm(orm.loglog, new.data, se=TRUE) mean.cloglog <- mean.orm(orm.cloglog, new.data, se=TRUE) mean.ols <- predict(ols.mod, new.data, se.fit=TRUE, conf.int=0.95) mean.ols <- cbind(mean.ols$linear.predictors, mean.ols$se.fit, mean.ols$lower, mean.ols$upper) probit.00[i,] <- mean.probit[1,] probit.10[i,] <- mean.probit[2,] probit.01[i,] <- mean.probit[3,] probit.11[i,] <- mean.probit[4,] logit.00[i,] <- mean.logit[1,] logit.10[i,] <- mean.logit[2,] logit.01[i,] <- mean.logit[3,] logit.11[i,] <- mean.logit[4,] loglog.00[i,] <- mean.loglog[1,] loglog.10[i,] <- mean.loglog[2,] loglog.01[i,] <- mean.loglog[3,] loglog.11[i,] <- mean.loglog[4,] cloglog.00[i,] <- mean.cloglog[1,] cloglog.10[i,] <- mean.cloglog[2,] cloglog.01[i,] <- mean.cloglog[3,] cloglog.11[i,] <- mean.cloglog[4,] ols.00[i,] <- mean.ols[1,] ols.10[i,] <- mean.ols[2,] ols.01[i,] <- mean.ols[3,] ols.11[i,] <- mean.ols[4,] }) } return(list(probit.00=probit.00, probit.10=probit.10, probit.01=probit.01, probit.11=probit.11, logit.00=logit.00, logit.10=logit.10, logit.01=logit.01, logit.11=logit.11, loglog.00=loglog.00, loglog.10=loglog.10, loglog.01=loglog.01, loglog.11=loglog.11, cloglog.00=cloglog.00, cloglog.10=cloglog.10, cloglog.01=cloglog.01, cloglog.11=cloglog.11, ols.00=ols.00, ols.10=ols.10, ols.01=ols.01, ols.11=ols.11 )) } sim3_mean.fun.c <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1,-0.5),new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1))){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] probit.00 <- matrix(NA, ncol=4, nrow=sim) probit.10 <- matrix(NA, ncol=4, nrow=sim) probit.01 <- matrix(NA, ncol=4, nrow=sim) probit.11 <- matrix(NA, ncol=4, nrow=sim) colnames(probit.00) <- c("est", "se", "lb", "ub") logit.00 <- matrix(NA, ncol=4, nrow=sim) logit.10 <- matrix(NA, ncol=4, nrow=sim) logit.01 <- matrix(NA, ncol=4, nrow=sim) logit.11 <- matrix(NA, ncol=4, nrow=sim) loglog.00 <- matrix(NA, ncol=4, nrow=sim) loglog.10 <- matrix(NA, ncol=4, nrow=sim) loglog.01 <- matrix(NA, ncol=4, nrow=sim) loglog.11 <- matrix(NA, ncol=4, nrow=sim) cloglog.00 <- matrix(NA, ncol=4, nrow=sim) cloglog.10 <- matrix(NA, ncol=4, nrow=sim) cloglog.01 <- matrix(NA, ncol=4, nrow=sim) cloglog.11 <- matrix(NA, ncol=4, nrow=sim) ols.00 <- matrix(NA, ncol=4, nrow=sim) ols.10 <- matrix(NA, ncol=4, nrow=sim) ols.01 <- matrix(NA, ncol=4, nrow=sim) ols.11 <- matrix(NA, ncol=4, nrow=sim) for(i in 1:sim){ try({ data <- generate.data.c(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta) orm.probit <- orm(y~z1+z2, data=data, family=probit, x=TRUE, y=TRUE) orm.logit <- orm(y~z1+z2, data=data, family=logistic, x=TRUE, y=TRUE) orm.loglog <- orm(y~z1+z2, data=data, family=loglog, x=TRUE, y=TRUE) orm.cloglog <- orm(y~z1+z2, data=data, family=cloglog, x=TRUE, y=TRUE) ols.mod <- ols(y~ z1+z2, data=data) mean.probit <- mean.orm(orm.probit, new.data, se=TRUE) mean.logit <- mean.orm(orm.logit, new.data, se=TRUE) mean.loglog <- mean.orm(orm.loglog, new.data, se=TRUE) mean.cloglog <- mean.orm(orm.cloglog, new.data, se=TRUE) mean.ols <- predict(ols.mod, new.data, se.fit=TRUE, conf.int=0.95) mean.ols <- cbind(mean.ols$linear.predictors, mean.ols$se.fit, mean.ols$lower, mean.ols$upper) probit.00[i,] <- mean.probit[1,] probit.10[i,] <- mean.probit[2,] probit.01[i,] <- mean.probit[3,] probit.11[i,] <- mean.probit[4,] logit.00[i,] <- mean.logit[1,] logit.10[i,] <- mean.logit[2,] logit.01[i,] <- mean.logit[3,] logit.11[i,] <- mean.logit[4,] loglog.00[i,] <- mean.loglog[1,] loglog.10[i,] <- mean.loglog[2,] loglog.01[i,] <- mean.loglog[3,] loglog.11[i,] <- mean.loglog[4,] cloglog.00[i,] <- mean.cloglog[1,] cloglog.10[i,] <- mean.cloglog[2,] cloglog.01[i,] <- mean.cloglog[3,] cloglog.11[i,] <- mean.cloglog[4,] ols.00[i,] <- mean.ols[1,] ols.10[i,] <- mean.ols[2,] ols.01[i,] <- mean.ols[3,] ols.11[i,] <- mean.ols[4,] }) } return(list(probit.00=probit.00, probit.10=probit.10, probit.01=probit.01, probit.11=probit.11, logit.00=logit.00, logit.10=logit.10, logit.01=logit.01, logit.11=logit.11, loglog.00=loglog.00, loglog.10=loglog.10, loglog.01=loglog.01, loglog.11=loglog.11, cloglog.00=cloglog.00, cloglog.10=cloglog.10, cloglog.01=cloglog.01, cloglog.11=cloglog.11, ols.00=ols.00, ols.10=ols.10, ols.01=ols.01, ols.11=ols.11 )) } sim3_mean.fun.d <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1,-0.5),new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1))){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] probit.00 <- matrix(NA, ncol=4, nrow=sim) probit.10 <- matrix(NA, ncol=4, nrow=sim) probit.01 <- matrix(NA, ncol=4, nrow=sim) probit.11 <- matrix(NA, ncol=4, nrow=sim) colnames(probit.00) <- c("est", "se", "lb", "ub") logit.00 <- matrix(NA, ncol=4, nrow=sim) logit.10 <- matrix(NA, ncol=4, nrow=sim) logit.01 <- matrix(NA, ncol=4, nrow=sim) logit.11 <- matrix(NA, ncol=4, nrow=sim) loglog.00 <- matrix(NA, ncol=4, nrow=sim) loglog.10 <- matrix(NA, ncol=4, nrow=sim) loglog.01 <- matrix(NA, ncol=4, nrow=sim) loglog.11 <- matrix(NA, ncol=4, nrow=sim) cloglog.00 <- matrix(NA, ncol=4, nrow=sim) cloglog.10 <- matrix(NA, ncol=4, nrow=sim) cloglog.01 <- matrix(NA, ncol=4, nrow=sim) cloglog.11 <- matrix(NA, ncol=4, nrow=sim) ols.00 <- matrix(NA, ncol=4, nrow=sim) ols.10 <- matrix(NA, ncol=4, nrow=sim) ols.01 <- matrix(NA, ncol=4, nrow=sim) ols.11 <- matrix(NA, ncol=4, nrow=sim) for(i in 1:sim){ try({ data <- generate.data.d(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta) orm.probit <- orm(y~z1+z2, data=data, family=probit, x=TRUE, y=TRUE) orm.logit <- orm(y~z1+z2, data=data, family=logistic, x=TRUE, y=TRUE) orm.loglog <- orm(y~z1+z2, data=data, family=loglog, x=TRUE, y=TRUE) orm.cloglog <- orm(y~z1+z2, data=data, family=cloglog, x=TRUE, y=TRUE) ols.mod <- ols(y~ z1+z2, data=data) mean.probit <- mean.orm(orm.probit, new.data, se=TRUE) mean.logit <- mean.orm(orm.logit, new.data, se=TRUE) mean.loglog <- mean.orm(orm.loglog, new.data, se=TRUE) mean.cloglog <- mean.orm(orm.cloglog, new.data, se=TRUE) mean.ols <- predict(ols.mod, new.data, se.fit=TRUE, conf.int=0.95) mean.ols <- cbind(mean.ols$linear.predictors, mean.ols$se.fit, mean.ols$lower, mean.ols$upper) probit.00[i,] <- mean.probit[1,] probit.10[i,] <- mean.probit[2,] probit.01[i,] <- mean.probit[3,] probit.11[i,] <- mean.probit[4,] logit.00[i,] <- mean.logit[1,] logit.10[i,] <- mean.logit[2,] logit.01[i,] <- mean.logit[3,] logit.11[i,] <- mean.logit[4,] loglog.00[i,] <- mean.loglog[1,] loglog.10[i,] <- mean.loglog[2,] loglog.01[i,] <- mean.loglog[3,] loglog.11[i,] <- mean.loglog[4,] cloglog.00[i,] <- mean.cloglog[1,] cloglog.10[i,] <- mean.cloglog[2,] cloglog.01[i,] <- mean.cloglog[3,] cloglog.11[i,] <- mean.cloglog[4,] ols.00[i,] <- mean.ols[1,] ols.10[i,] <- mean.ols[2,] ols.01[i,] <- mean.ols[3,] ols.11[i,] <- mean.ols[4,] }) } return(list(probit.00=probit.00, probit.10=probit.10, probit.01=probit.01, probit.11=probit.11, logit.00=logit.00, logit.10=logit.10, logit.01=logit.01, logit.11=logit.11, loglog.00=loglog.00, loglog.10=loglog.10, loglog.01=loglog.01, loglog.11=loglog.11, cloglog.00=cloglog.00, cloglog.10=cloglog.10, cloglog.01=cloglog.01, cloglog.11=cloglog.11, ols.00=ols.00, ols.10=ols.10, ols.01=ols.01, ols.11=ols.11 )) } sim3_mean.fun.e <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1,-0.5),new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1))){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] probit.00 <- matrix(NA, ncol=4, nrow=sim) probit.10 <- matrix(NA, ncol=4, nrow=sim) probit.01 <- matrix(NA, ncol=4, nrow=sim) probit.11 <- matrix(NA, ncol=4, nrow=sim) colnames(probit.00) <- c("est", "se", "lb", "ub") logit.00 <- matrix(NA, ncol=4, nrow=sim) logit.10 <- matrix(NA, ncol=4, nrow=sim) logit.01 <- matrix(NA, ncol=4, nrow=sim) logit.11 <- matrix(NA, ncol=4, nrow=sim) loglog.00 <- matrix(NA, ncol=4, nrow=sim) loglog.10 <- matrix(NA, ncol=4, nrow=sim) loglog.01 <- matrix(NA, ncol=4, nrow=sim) loglog.11 <- matrix(NA, ncol=4, nrow=sim) cloglog.00 <- matrix(NA, ncol=4, nrow=sim) cloglog.10 <- matrix(NA, ncol=4, nrow=sim) cloglog.01 <- matrix(NA, ncol=4, nrow=sim) cloglog.11 <- matrix(NA, ncol=4, nrow=sim) ols.00 <- matrix(NA, ncol=4, nrow=sim) ols.10 <- matrix(NA, ncol=4, nrow=sim) ols.01 <- matrix(NA, ncol=4, nrow=sim) ols.11 <- matrix(NA, ncol=4, nrow=sim) for(i in 1:sim){ try({ data <- generate.data.e(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta) orm.probit <- orm(y~z1+z2, data=data, family=probit, x=TRUE, y=TRUE) orm.logit <- orm(y~z1+z2, data=data, family=logistic, x=TRUE, y=TRUE) orm.loglog <- orm(y~z1+z2, data=data, family=loglog, x=TRUE, y=TRUE) orm.cloglog <- orm(y~z1+z2, data=data, family=cloglog, x=TRUE, y=TRUE) ols.mod <- ols(y~ z1+z2, data=data) mean.probit <- mean.orm(orm.probit, new.data, se=TRUE) mean.logit <- mean.orm(orm.logit, new.data, se=TRUE) mean.loglog <- mean.orm(orm.loglog, new.data, se=TRUE) mean.cloglog <- mean.orm(orm.cloglog, new.data, se=TRUE) mean.ols <- predict(ols.mod, new.data, se.fit=TRUE, conf.int=0.95) mean.ols <- cbind(mean.ols$linear.predictors, mean.ols$se.fit, mean.ols$lower, mean.ols$upper) probit.00[i,] <- mean.probit[1,] probit.10[i,] <- mean.probit[2,] probit.01[i,] <- mean.probit[3,] probit.11[i,] <- mean.probit[4,] logit.00[i,] <- mean.logit[1,] logit.10[i,] <- mean.logit[2,] logit.01[i,] <- mean.logit[3,] logit.11[i,] <- mean.logit[4,] loglog.00[i,] <- mean.loglog[1,] loglog.10[i,] <- mean.loglog[2,] loglog.01[i,] <- mean.loglog[3,] loglog.11[i,] <- mean.loglog[4,] cloglog.00[i,] <- mean.cloglog[1,] cloglog.10[i,] <- mean.cloglog[2,] cloglog.01[i,] <- mean.cloglog[3,] cloglog.11[i,] <- mean.cloglog[4,] ols.00[i,] <- mean.ols[1,] ols.10[i,] <- mean.ols[2,] ols.01[i,] <- mean.ols[3,] ols.11[i,] <- mean.ols[4,] }) } return(list(probit.00=probit.00, probit.10=probit.10, probit.01=probit.01, probit.11=probit.11, logit.00=logit.00, logit.10=logit.10, logit.01=logit.01, logit.11=logit.11, loglog.00=loglog.00, loglog.10=loglog.10, loglog.01=loglog.01, loglog.11=loglog.11, cloglog.00=cloglog.00, cloglog.10=cloglog.10, cloglog.01=cloglog.01, cloglog.11=cloglog.11, ols.00=ols.00, ols.10=ols.10, ols.01=ols.01, ols.11=ols.11 )) } sim3_mean.fun.f <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1,-0.5),new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1))){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] probit.00 <- matrix(NA, ncol=4, nrow=sim) probit.10 <- matrix(NA, ncol=4, nrow=sim) probit.01 <- matrix(NA, ncol=4, nrow=sim) probit.11 <- matrix(NA, ncol=4, nrow=sim) colnames(probit.00) <- c("est", "se", "lb", "ub") logit.00 <- matrix(NA, ncol=4, nrow=sim) logit.10 <- matrix(NA, ncol=4, nrow=sim) logit.01 <- matrix(NA, ncol=4, nrow=sim) logit.11 <- matrix(NA, ncol=4, nrow=sim) loglog.00 <- matrix(NA, ncol=4, nrow=sim) loglog.10 <- matrix(NA, ncol=4, nrow=sim) loglog.01 <- matrix(NA, ncol=4, nrow=sim) loglog.11 <- matrix(NA, ncol=4, nrow=sim) cloglog.00 <- matrix(NA, ncol=4, nrow=sim) cloglog.10 <- matrix(NA, ncol=4, nrow=sim) cloglog.01 <- matrix(NA, ncol=4, nrow=sim) cloglog.11 <- matrix(NA, ncol=4, nrow=sim) ols.00 <- matrix(NA, ncol=4, nrow=sim) ols.10 <- matrix(NA, ncol=4, nrow=sim) ols.01 <- matrix(NA, ncol=4, nrow=sim) ols.11 <- matrix(NA, ncol=4, nrow=sim) for(i in 1:sim){ try({ data <- generate.data.f(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta) orm.probit <- orm(y~z1+z2, data=data, family=probit, x=TRUE, y=TRUE) orm.logit <- orm(y~z1+z2, data=data, family=logistic, x=TRUE, y=TRUE) orm.loglog <- orm(y~z1+z2, data=data, family=loglog, x=TRUE, y=TRUE) orm.cloglog <- orm(y~z1+z2, data=data, family=cloglog, x=TRUE, y=TRUE) ols.mod <- ols(y~ z1+z2, data=data) mean.probit <- mean.orm(orm.probit, new.data, se=TRUE) mean.logit <- mean.orm(orm.logit, new.data, se=TRUE) mean.loglog <- mean.orm(orm.loglog, new.data, se=TRUE) mean.cloglog <- mean.orm(orm.cloglog, new.data, se=TRUE) mean.ols <- predict(ols.mod, new.data, se.fit=TRUE, conf.int=0.95) mean.ols <- cbind(mean.ols$linear.predictors, mean.ols$se.fit, mean.ols$lower, mean.ols$upper) probit.00[i,] <- mean.probit[1,] probit.10[i,] <- mean.probit[2,] probit.01[i,] <- mean.probit[3,] probit.11[i,] <- mean.probit[4,] logit.00[i,] <- mean.logit[1,] logit.10[i,] <- mean.logit[2,] logit.01[i,] <- mean.logit[3,] logit.11[i,] <- mean.logit[4,] loglog.00[i,] <- mean.loglog[1,] loglog.10[i,] <- mean.loglog[2,] loglog.01[i,] <- mean.loglog[3,] loglog.11[i,] <- mean.loglog[4,] cloglog.00[i,] <- mean.cloglog[1,] cloglog.10[i,] <- mean.cloglog[2,] cloglog.01[i,] <- mean.cloglog[3,] cloglog.11[i,] <- mean.cloglog[4,] ols.00[i,] <- mean.ols[1,] ols.10[i,] <- mean.ols[2,] ols.01[i,] <- mean.ols[3,] ols.11[i,] <- mean.ols[4,] }) } return(list(probit.00=probit.00, probit.10=probit.10, probit.01=probit.01, probit.11=probit.11, logit.00=logit.00, logit.10=logit.10, logit.01=logit.01, logit.11=logit.11, loglog.00=loglog.00, loglog.10=loglog.10, loglog.01=loglog.01, loglog.11=loglog.11, cloglog.00=cloglog.00, cloglog.10=cloglog.10, cloglog.01=cloglog.01, cloglog.11=cloglog.11, ols.00=ols.00, ols.10=ols.10, ols.01=ols.01, ols.11=ols.11 )) } sim3_mean.fun.g <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1,-0.5),new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1))){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] probit.00 <- matrix(NA, ncol=4, nrow=sim) probit.10 <- matrix(NA, ncol=4, nrow=sim) probit.01 <- matrix(NA, ncol=4, nrow=sim) probit.11 <- matrix(NA, ncol=4, nrow=sim) colnames(probit.00) <- c("est", "se", "lb", "ub") logit.00 <- matrix(NA, ncol=4, nrow=sim) logit.10 <- matrix(NA, ncol=4, nrow=sim) logit.01 <- matrix(NA, ncol=4, nrow=sim) logit.11 <- matrix(NA, ncol=4, nrow=sim) loglog.00 <- matrix(NA, ncol=4, nrow=sim) loglog.10 <- matrix(NA, ncol=4, nrow=sim) loglog.01 <- matrix(NA, ncol=4, nrow=sim) loglog.11 <- matrix(NA, ncol=4, nrow=sim) cloglog.00 <- matrix(NA, ncol=4, nrow=sim) cloglog.10 <- matrix(NA, ncol=4, nrow=sim) cloglog.01 <- matrix(NA, ncol=4, nrow=sim) cloglog.11 <- matrix(NA, ncol=4, nrow=sim) ols.00 <- matrix(NA, ncol=4, nrow=sim) ols.10 <- matrix(NA, ncol=4, nrow=sim) ols.01 <- matrix(NA, ncol=4, nrow=sim) ols.11 <- matrix(NA, ncol=4, nrow=sim) for(i in 1:sim){ try({ data <- generate.data.g(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta) orm.probit <- orm(y~z1+z2, data=data, family=probit, x=TRUE, y=TRUE) orm.logit <- orm(y~z1+z2, data=data, family=logistic, x=TRUE, y=TRUE) orm.loglog <- orm(y~z1+z2, data=data, family=loglog, x=TRUE, y=TRUE) orm.cloglog <- orm(y~z1+z2, data=data, family=cloglog, x=TRUE, y=TRUE) ols.mod <- ols(y~ z1+z2, data=data) mean.probit <- mean.orm(orm.probit, new.data, se=TRUE) mean.logit <- mean.orm(orm.logit, new.data, se=TRUE) mean.loglog <- mean.orm(orm.loglog, new.data, se=TRUE) mean.cloglog <- mean.orm(orm.cloglog, new.data, se=TRUE) mean.ols <- predict(ols.mod, new.data, se.fit=TRUE, conf.int=0.95) mean.ols <- cbind(mean.ols$linear.predictors, mean.ols$se.fit, mean.ols$lower, mean.ols$upper) probit.00[i,] <- mean.probit[1,] probit.10[i,] <- mean.probit[2,] probit.01[i,] <- mean.probit[3,] probit.11[i,] <- mean.probit[4,] logit.00[i,] <- mean.logit[1,] logit.10[i,] <- mean.logit[2,] logit.01[i,] <- mean.logit[3,] logit.11[i,] <- mean.logit[4,] loglog.00[i,] <- mean.loglog[1,] loglog.10[i,] <- mean.loglog[2,] loglog.01[i,] <- mean.loglog[3,] loglog.11[i,] <- mean.loglog[4,] cloglog.00[i,] <- mean.cloglog[1,] cloglog.10[i,] <- mean.cloglog[2,] cloglog.01[i,] <- mean.cloglog[3,] cloglog.11[i,] <- mean.cloglog[4,] ols.00[i,] <- mean.ols[1,] ols.10[i,] <- mean.ols[2,] ols.01[i,] <- mean.ols[3,] ols.11[i,] <- mean.ols[4,] }) } return(list(probit.00=probit.00, probit.10=probit.10, probit.01=probit.01, probit.11=probit.11, logit.00=logit.00, logit.10=logit.10, logit.01=logit.01, logit.11=logit.11, loglog.00=loglog.00, loglog.10=loglog.10, loglog.01=loglog.01, loglog.11=loglog.11, cloglog.00=cloglog.00, cloglog.10=cloglog.10, cloglog.01=cloglog.01, cloglog.11=cloglog.11, ols.00=ols.00, ols.10=ols.10, ols.01=ols.01, ols.11=ols.11 )) } sim3_mean.fun.h <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1,-0.5),new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1))){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] probit.00 <- matrix(NA, ncol=4, nrow=sim) probit.10 <- matrix(NA, ncol=4, nrow=sim) probit.01 <- matrix(NA, ncol=4, nrow=sim) probit.11 <- matrix(NA, ncol=4, nrow=sim) colnames(probit.00) <- c("est", "se", "lb", "ub") logit.00 <- matrix(NA, ncol=4, nrow=sim) logit.10 <- matrix(NA, ncol=4, nrow=sim) logit.01 <- matrix(NA, ncol=4, nrow=sim) logit.11 <- matrix(NA, ncol=4, nrow=sim) loglog.00 <- matrix(NA, ncol=4, nrow=sim) loglog.10 <- matrix(NA, ncol=4, nrow=sim) loglog.01 <- matrix(NA, ncol=4, nrow=sim) loglog.11 <- matrix(NA, ncol=4, nrow=sim) cloglog.00 <- matrix(NA, ncol=4, nrow=sim) cloglog.10 <- matrix(NA, ncol=4, nrow=sim) cloglog.01 <- matrix(NA, ncol=4, nrow=sim) cloglog.11 <- matrix(NA, ncol=4, nrow=sim) ols.00 <- matrix(NA, ncol=4, nrow=sim) ols.10 <- matrix(NA, ncol=4, nrow=sim) ols.01 <- matrix(NA, ncol=4, nrow=sim) ols.11 <- matrix(NA, ncol=4, nrow=sim) for(i in 1:sim){ try({ data <- generate.data.h(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta) orm.probit <- orm(y~z1+z2, data=data, family=probit, x=TRUE, y=TRUE) orm.logit <- orm(y~z1+z2, data=data, family=logistic, x=TRUE, y=TRUE) orm.loglog <- orm(y~z1+z2, data=data, family=loglog, x=TRUE, y=TRUE) orm.cloglog <- orm(y~z1+z2, data=data, family=cloglog, x=TRUE, y=TRUE) ols.mod <- ols(y~ z1+z2, data=data) mean.probit <- mean.orm(orm.probit, new.data, se=TRUE) mean.logit <- mean.orm(orm.logit, new.data, se=TRUE) mean.loglog <- mean.orm(orm.loglog, new.data, se=TRUE) mean.cloglog <- mean.orm(orm.cloglog, new.data, se=TRUE) mean.ols <- predict(ols.mod, new.data, se.fit=TRUE, conf.int=0.95) mean.ols <- cbind(mean.ols$linear.predictors, mean.ols$se.fit, mean.ols$lower, mean.ols$upper) probit.00[i,] <- mean.probit[1,] probit.10[i,] <- mean.probit[2,] probit.01[i,] <- mean.probit[3,] probit.11[i,] <- mean.probit[4,] logit.00[i,] <- mean.logit[1,] logit.10[i,] <- mean.logit[2,] logit.01[i,] <- mean.logit[3,] logit.11[i,] <- mean.logit[4,] loglog.00[i,] <- mean.loglog[1,] loglog.10[i,] <- mean.loglog[2,] loglog.01[i,] <- mean.loglog[3,] loglog.11[i,] <- mean.loglog[4,] cloglog.00[i,] <- mean.cloglog[1,] cloglog.10[i,] <- mean.cloglog[2,] cloglog.01[i,] <- mean.cloglog[3,] cloglog.11[i,] <- mean.cloglog[4,] ols.00[i,] <- mean.ols[1,] ols.10[i,] <- mean.ols[2,] ols.01[i,] <- mean.ols[3,] ols.11[i,] <- mean.ols[4,] }) } return(list(probit.00=probit.00, probit.10=probit.10, probit.01=probit.01, probit.11=probit.11, logit.00=logit.00, logit.10=logit.10, logit.01=logit.01, logit.11=logit.11, loglog.00=loglog.00, loglog.10=loglog.10, loglog.01=loglog.01, loglog.11=loglog.11, cloglog.00=cloglog.00, cloglog.10=cloglog.10, cloglog.01=cloglog.01, cloglog.11=cloglog.11, ols.00=ols.00, ols.10=ols.10, ols.01=ols.01, ols.11=ols.11 )) } seed=1 p <- 0.5 alpha <- 0 beta <- c(1, -0.5) sigma <- 1 sim=10^4 sim3_mean.a.n50 <-sim3_mean.fun.a(sim=sim, n=50) sim3_mean.b.n50 <-sim3_mean.fun.b(sim=sim, n=50) sim3_mean.c.n50 <-sim3_mean.fun.c(sim=sim, n=50) sim3_mean.d.n50 <-sim3_mean.fun.d(sim=sim, n=50) sim3_mean.e.n50 <-sim3_mean.fun.e(sim=sim, n=50) sim3_mean.f.n50 <-sim3_mean.fun.f(sim=sim, n=50) sim3_mean.g.n50 <-sim3_mean.fun.g(sim=sim, n=50) sim3_mean.h.n50 <-sim3_mean.fun.h(sim=sim, n=50) sim3_mean.a.n100 <-sim3_mean.fun.a(sim=sim, n=50) sim3_mean.b.n100 <-sim3_mean.fun.b(sim=sim, n=50) sim3_mean.c.n100 <-sim3_mean.fun.c(sim=sim, n=50) sim3_mean.d.n100 <-sim3_mean.fun.d(sim=sim, n=50) sim3_mean.e.n100 <-sim3_mean.fun.e(sim=sim, n=50) sim3_mean.f.n100 <-sim3_mean.fun.f(sim=sim, n=50) sim3_mean.g.n100 <-sim3_mean.fun.g(sim=sim, n=50) sim3_mean.h.n100 <-sim3_mean.fun.h(sim=sim, n=50) sim3_mean.a.n50 <-sim3_mean.fun.a(sim=sim, n=100) sim3_mean.b.n50 <-sim3_mean.fun.b(sim=sim, n=100) sim3_mean.c.n50 <-sim3_mean.fun.c(sim=sim, n=100) sim3_mean.d.n50 <-sim3_mean.fun.d(sim=sim, n=100) sim3_mean.e.n50 <-sim3_mean.fun.e(sim=sim, n=100) sim3_mean.f.n50 <-sim3_mean.fun.f(sim=sim, n=100) sim3_mean.g.n50 <-sim3_mean.fun.g(sim=sim, n=100) sim3_mean.h.n50 <-sim3_mean.fun.h(sim=sim, n=100) sim3_mean.a.n200 <-sim3_mean.fun.a(sim=sim, n=200) sim3_mean.b.n200 <-sim3_mean.fun.b(sim=sim, n=200) sim3_mean.c.n200 <-sim3_mean.fun.c(sim=sim, n=200) sim3_mean.d.n200 <-sim3_mean.fun.d(sim=sim, n=200) sim3_mean.e.n200 <-sim3_mean.fun.e(sim=sim, n=200) sim3_mean.f.n200 <-sim3_mean.fun.f(sim=sim, n=200) sim3_mean.g.n200 <-sim3_mean.fun.g(sim=sim, n=200) sim3_mean.h.n200 <-sim3_mean.fun.h(sim=sim, n=200) new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1)) beta <- c(1, -0.5) true=as.matrix(new.data)%*%beta sim3_mean.summary <- function(true=as.matrix(new.data)%*%beta, result){ result.00.est <- c(mean(result$probit.00[,1], na.rm=TRUE), mean(result$logit.00[,1], na.rm=TRUE), mean(result$loglog.00[,1], na.rm=TRUE), mean(result$cloglog.00[,1], na.rm=TRUE)) result.00.coverage <- c(mean(result$probit.00[,3]<=true[1,1]&result$probit.00[,4]>=true[1,1], na.rm=TRUE), mean(result$logit.00[,3]<=true[1,1]&result$logit.00[,4]>=true[1,1], na.rm=TRUE), mean(result$loglog.00[,3]<=true[1,1]&result$loglog.00[,4]>=true[1,1], na.rm=TRUE), mean(result$cloglog.00[,3]<=true[1,1]&result$cloglog.00[,4]>=true[1,1], na.rm=TRUE)) result.00.mse <- c(mean((result$probit.00[,1]-true[1,1])^2, na.rm=TRUE), mean( (result$logit.00[,1]-true[1,1])^2, na.rm=TRUE), mean((result$loglog.00[,1]-true[1,1])^2, na.rm=TRUE), mean((result$cloglog.00[,1]-true[1,1])^2, na.rm=TRUE), mean((result$ols.00[,1]-true[1,1])^2, na.rm=TRUE) ) result.00.re <- (result.00.mse[5]/result.00.mse)[-5] result.00 <- cbind(format(round(result.00.est,2), nsmall=2), format(round(result.00.coverage, 3), nsmall=3), format(round(result.00.re,3), nsmall=3)) result.00.out <- c(format(round(true[1,1],2),nsmall=0), result.00[1,], result.00[2,], result.00[3,], result.00[4,]) ######## 10 result.10.est <- c(mean(result$probit.10[,1], na.rm=TRUE), mean(result$logit.10[,1], na.rm=TRUE), mean(result$loglog.10[,1], na.rm=TRUE), mean(result$cloglog.10[,1], na.rm=TRUE)) result.10.coverage <- c(mean(result$probit.10[,3]<=true[2,1]&result$probit.10[,4]>=true[2,1], na.rm=TRUE), mean(result$logit.10[,3]<=true[2,1]&result$logit.10[,4]>=true[2,1], na.rm=TRUE), mean(result$loglog.10[,3]<=true[2,1]&result$loglog.10[,4]>=true[2,1], na.rm=TRUE), mean(result$cloglog.10[,3]<=true[2,1]&result$cloglog.10[,4]>=true[2,1], na.rm=TRUE)) result.10.mse <- c(mean((result$probit.10[,1]-true[2,1])^2, na.rm=TRUE), mean( (result$logit.10[,1]-true[2,1])^2, na.rm=TRUE), mean((result$loglog.10[,1]-true[2,1])^2, na.rm=TRUE), mean((result$cloglog.10[,1]-true[2,1])^2, na.rm=TRUE), mean((result$ols.10[,1]-true[2,1])^2, na.rm=TRUE) ) result.10.re <- (result.10.mse[5]/result.10.mse)[-5] result.10 <- cbind(format(round(result.10.est,2), nsmall=2), format(round(result.10.coverage, 3), nsmall=3), format(round(result.10.re,3), nsmall=3)) result.10.out <- c(format(round(true[2,1],2),nsmall=0), result.10[1,], result.10[2,], result.10[3,], result.10[4,]) ###### 01 result.01.est <- c(mean(result$probit.01[,1], na.rm=TRUE), mean(result$logit.01[,1], na.rm=TRUE), mean(result$loglog.01[,1], na.rm=TRUE), mean(result$cloglog.01[,1], na.rm=TRUE)) result.01.coverage <- c(mean(result$probit.01[,3]<=true[3,1]&result$probit.01[,4]>=true[3,1], na.rm=TRUE), mean(result$logit.01[,3]<=true[3,1]&result$logit.01[,4]>=true[3,1], na.rm=TRUE), mean(result$loglog.01[,3]<=true[3,1]&result$loglog.01[,4]>=true[3,1], na.rm=TRUE), mean(result$cloglog.01[,3]<=true[3,1]&result$cloglog.01[,4]>=true[3,1], na.rm=TRUE)) result.01.mse <- c(mean((result$probit.01[,1]-true[3,1])^2, na.rm=TRUE), mean( (result$logit.01[,1]-true[3,1])^2, na.rm=TRUE), mean((result$loglog.01[,1]-true[3,1])^2, na.rm=TRUE), mean((result$cloglog.01[,1]-true[3,1])^2, na.rm=TRUE), mean((result$ols.01[,1]-true[3,1])^2, na.rm=TRUE) ) result.01.re <- (result.01.mse[5]/result.01.mse)[-5] result.01 <- cbind(format(round(result.01.est,2), nsmall=2), format(round(result.01.coverage, 3), nsmall=3), format(round(result.01.re,3), nsmall=3)) result.01.out <- c(format(round(true[3,1],2),nsmall=0), result.01[1,], result.01[2,], result.01[3,], result.01[4,]) ###### 11 result.11.est <- c(mean(result$probit.11[,1], na.rm=TRUE), mean(result$logit.11[,1], na.rm=TRUE), mean(result$loglog.11[,1], na.rm=TRUE), mean(result$cloglog.11[,1], na.rm=TRUE)) result.11.coverage <- c(mean(result$probit.11[,3]<=true[4,1]&result$probit.11[,4]>=true[4,1], na.rm=TRUE), mean(result$logit.11[,3]<=true[4,1]&result$logit.11[,4]>=true[4,1], na.rm=TRUE), mean(result$loglog.11[,3]<=true[4,1]&result$loglog.11[,4]>=true[4,1], na.rm=TRUE), mean(result$cloglog.11[,3]<=true[4,1]&result$cloglog.11[,4]>=true[4,1], na.rm=TRUE)) result.11.mse <- c(mean((result$probit.11[,1]-true[4,1])^2, na.rm=TRUE), mean( (result$logit.11[,1]-true[4,1])^2, na.rm=TRUE), mean((result$loglog.11[,1]-true[4,1])^2, na.rm=TRUE), mean((result$cloglog.11[,1]-true[4,1])^2, na.rm=TRUE), mean((result$ols.11[,1]-true[4,1])^2, na.rm=TRUE) ) result.11.re <- (result.11.mse[5]/result.11.mse)[-5] result.11 <- cbind(format(round(result.11.est,2), nsmall=2), format(round(result.11.coverage, 3), nsmall=3), format(round(result.11.re,3), nsmall=3)) result.11.out <- c(format(round(true[4,1],2),nsmall=0), result.11[1,], result.11[2,], result.11[3,], result.11[4,]) result.out <- rbind(result.00.out, result.10.out, result.01.out, result.11.out) return(result.out) } beta=c(1, -0.5) new.data=as.matrix(data.frame(z1=c(0, 1, 0, 1), z2=c(0, 0, 1, 1))) result.sim3_mean.a.n100 <- sim3_mean.summary(true=as.matrix(new.data)%*%beta, result=sim3_mean.a.n100) result.sim3_mean.b.n100 <- sim3_mean.summary(true=as.matrix(new.data)%*%beta, result=sim3_mean.b.n100) result.sim3_mean.c.n100 <- sim3_mean.summary(true=digamma(1)+as.matrix(new.data)%*%beta, result=sim3_mean.c.n100) result.sim3_mean.d.n100 <- sim3_mean.summary(true=-digamma(1)+as.matrix(new.data)%*%beta, result=sim3_mean.d.n100) result.sim3_mean.e.n100 <- sim3_mean.summary(true=as.matrix(new.data)%*%beta, result=sim3_mean.e.n100) result.sim3_mean.f.n100 <- sim3_mean.summary(true=as.matrix(new.data)%*%beta, result=sim3_mean.f.n100) result.sim3_mean.g.n100 <- sim3_mean.summary(true=as.matrix(new.data)%*%beta, result=sim3_mean.g.n100) result.sim3_mean.h.n100 <- sim3_mean.summary(true=as.matrix(new.data)%*%beta, result=sim3_mean.h.n100) ###### Figure 9, Tables S.10 and S.11 sim3_mean.out.n100 <- rbind(rep("", 13), result.sim3_mean.a.n100, rep("", 13), rep("", 13), result.sim3_mean.b.n100, rep("", 13), rep("", 13), result.sim3_mean.c.n100, rep("", 13), rep("", 13), result.sim3_mean.d.n100, rep("", 13), rep("", 13), result.sim3_mean.e.n100, rep("", 13), rep("", 13), result.sim3_mean.f.n100, rep("", 13), rep("", 13), result.sim3_mean.g.n100, rep("", 13), rep("", 13), result.sim3_mean.h.n100) ###### Tables S.6 and S.7 #################### result.sim3_mean.a.n50 <- sim3_mean.summary(true=as.matrix(new.data)%*%beta, result=sim3_mean.a.n50) result.sim3_mean.b.n50 <- sim3_mean.summary(true=as.matrix(new.data)%*%beta, result=sim3_mean.b.n50) result.sim3_mean.c.n50 <- sim3_mean.summary(true=digamma(1)+as.matrix(new.data)%*%beta, result=sim3_mean.c.n50) result.sim3_mean.d.n50 <- sim3_mean.summary(true=-digamma(1)+as.matrix(new.data)%*%beta, result=sim3_mean.d.n50) result.sim3_mean.e.n50 <- sim3_mean.summary(true=as.matrix(new.data)%*%beta, result=sim3_mean.e.n50) result.sim3_mean.f.n50 <- sim3_mean.summary(true=as.matrix(new.data)%*%beta, result=sim3_mean.f.n50) result.sim3_mean.g.n50 <- sim3_mean.summary(true=as.matrix(new.data)%*%beta, result=sim3_mean.g.n50) result.sim3_mean.h.n50 <- sim3_mean.summary(true=as.matrix(new.data)%*%beta, result=sim3_mean.h.n50) sim3_mean.out.n50 <- rbind(rep("", 13), result.sim3_mean.a.n50, rep("", 13), rep("", 13), result.sim3_mean.b.n50, rep("", 13), rep("", 13), result.sim3_mean.c.n50, rep("", 13), rep("", 13), result.sim3_mean.d.n50, rep("", 13), rep("", 13), result.sim3_mean.e.n50, rep("", 13), rep("", 13), result.sim3_mean.f.n50, rep("", 13), rep("", 13), result.sim3_mean.g.n50, rep("", 13), rep("", 13), result.sim3_mean.h.n50) ############# Tables S.14 and S.15 ################ result.sim3_mean.a.n200 <- sim3_mean.summary(true=as.matrix(new.data)%*%beta, result=sim3_mean.a.n200) result.sim3_mean.b.n200 <- sim3_mean.summary(true=as.matrix(new.data)%*%beta, result=sim3_mean.b.n200) result.sim3_mean.c.n200 <- sim3_mean.summary(true=digamma(1)+as.matrix(new.data)%*%beta, result=sim3_mean.c.n200) result.sim3_mean.d.n200 <- sim3_mean.summary(true=-digamma(1)+as.matrix(new.data)%*%beta, result=sim3_mean.d.n200) result.sim3_mean.e.n200 <- sim3_mean.summary(true=as.matrix(new.data)%*%beta, result=sim3_mean.e.n200) result.sim3_mean.f.n200 <- sim3_mean.summary(true=as.matrix(new.data)%*%beta, result=sim3_mean.f.n200) result.sim3_mean.g.n200 <- sim3_mean.summary(true=as.matrix(new.data)%*%beta, result=sim3_mean.g.n200) result.sim3_mean.h.n200 <- sim3_mean.summary(true=as.matrix(new.data)%*%beta, result=sim3_mean.h.n200) sim3_mean.out.n200 <- rbind(rep("", 13), result.sim3_mean.a.n200, rep("", 13), rep("", 13), result.sim3_mean.b.n200, rep("", 13), rep("", 13), result.sim3_mean.c.n200, rep("", 13), rep("", 13), result.sim3_mean.d.n200, rep("", 13), rep("", 13), result.sim3_mean.e.n200, rep("", 13), rep("", 13), result.sim3_mean.f.n200, rep("", 13), rep("", 13), result.sim3_mean.g.n200, rep("", 13), rep("", 13), result.sim3_mean.h.n200) ######### Sim 2: medians ####### library(quantreg) sim4_median.fun.a <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1,-0.5),new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1))){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] probit.00 <- matrix(NA, ncol=3, nrow=sim) probit.10 <- matrix(NA, ncol=3, nrow=sim) probit.01 <- matrix(NA, ncol=3, nrow=sim) probit.11 <- matrix(NA, ncol=3, nrow=sim) colnames(probit.00) <- c("est", "lb", "ub") logit.00 <- matrix(NA, ncol=3, nrow=sim) logit.10 <- matrix(NA, ncol=3, nrow=sim) logit.01 <- matrix(NA, ncol=3, nrow=sim) logit.11 <- matrix(NA, ncol=3, nrow=sim) loglog.00 <- matrix(NA, ncol=3, nrow=sim) loglog.10 <- matrix(NA, ncol=3, nrow=sim) loglog.01 <- matrix(NA, ncol=3, nrow=sim) loglog.11 <- matrix(NA, ncol=3, nrow=sim) cloglog.00 <- matrix(NA, ncol=3, nrow=sim) cloglog.10 <- matrix(NA, ncol=3, nrow=sim) cloglog.01 <- matrix(NA, ncol=3, nrow=sim) cloglog.11 <- matrix(NA, ncol=3, nrow=sim) rq.00 <- matrix(NA, ncol=3, nrow=sim) rq.10 <- matrix(NA, ncol=3, nrow=sim) rq.01 <- matrix(NA, ncol=3, nrow=sim) rq.11 <- matrix(NA, ncol=3, nrow=sim) for(i in 1:sim){ try({ data <- generate.data.a(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta) orm.probit <- orm(y~z1+z2, data=data, family=probit, x=TRUE, y=TRUE) orm.logit <- orm(y~z1+z2, data=data, family=logistic, x=TRUE, y=TRUE) orm.loglog <- orm(y~z1+z2, data=data, family=loglog, x=TRUE, y=TRUE) orm.cloglog <- orm(y~z1+z2, data=data, family=cloglog, x=TRUE, y=TRUE) rq.mod <- rq(y~ z1+z2, data=data) quantile.probit <- quantile.orm(orm.probit, new.data,probs=0.5, se=TRUE) quantile.probit <- cbind(quantile.probit$quantile, quantile.probit$lb, quantile.probit$ub) quantile.logit <- quantile.orm(orm.logit, new.data, probs=0.5, se=TRUE) quantile.logit <- cbind(quantile.logit$quantile, quantile.logit$lb, quantile.logit$ub) quantile.loglog <- quantile.orm(orm.loglog, new.data,probs=0.5, se=TRUE) quantile.loglog <- cbind(quantile.loglog$quantile, quantile.loglog$lb, quantile.loglog$ub) quantile.cloglog <- quantile.orm(orm.cloglog, new.data, probs=0.5,se=TRUE) quantile.cloglog <- cbind(quantile.cloglog$quantile, quantile.cloglog$lb, quantile.cloglog$ub) quantile.rq <- predict(rq.mod, new.data, interval = "confidence", conf.int=0.95) probit.00[i,] <- quantile.probit[1,] probit.10[i,] <- quantile.probit[2,] probit.01[i,] <- quantile.probit[3,] probit.11[i,] <- quantile.probit[4,] logit.00[i,] <- quantile.logit[1,] logit.10[i,] <- quantile.logit[2,] logit.01[i,] <- quantile.logit[3,] logit.11[i,] <- quantile.logit[4,] loglog.00[i,] <- quantile.loglog[1,] loglog.10[i,] <- quantile.loglog[2,] loglog.01[i,] <- quantile.loglog[3,] loglog.11[i,] <- quantile.loglog[4,] cloglog.00[i,] <- quantile.cloglog[1,] cloglog.10[i,] <- quantile.cloglog[2,] cloglog.01[i,] <- quantile.cloglog[3,] cloglog.11[i,] <- quantile.cloglog[4,] rq.00[i,] <- quantile.rq[1,] rq.10[i,] <- quantile.rq[2,] rq.01[i,] <- quantile.rq[3,] rq.11[i,] <- quantile.rq[4,] }) } return(list(probit.00=probit.00, probit.10=probit.10, probit.01=probit.01, probit.11=probit.11, logit.00=logit.00, logit.10=logit.10, logit.01=logit.01, logit.11=logit.11, loglog.00=loglog.00, loglog.10=loglog.10, loglog.01=loglog.01, loglog.11=loglog.11, cloglog.00=cloglog.00, cloglog.10=cloglog.10, cloglog.01=cloglog.01, cloglog.11=cloglog.11, rq.00=rq.00, rq.10=rq.10, rq.01=rq.01, rq.11=rq.11 )) } sim4_median.fun.b <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1,-0.5),new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1))){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] probit.00 <- matrix(NA, ncol=3, nrow=sim) probit.10 <- matrix(NA, ncol=3, nrow=sim) probit.01 <- matrix(NA, ncol=3, nrow=sim) probit.11 <- matrix(NA, ncol=3, nrow=sim) colnames(probit.00) <- c("est", "lb", "ub") logit.00 <- matrix(NA, ncol=3, nrow=sim) logit.10 <- matrix(NA, ncol=3, nrow=sim) logit.01 <- matrix(NA, ncol=3, nrow=sim) logit.11 <- matrix(NA, ncol=3, nrow=sim) loglog.00 <- matrix(NA, ncol=3, nrow=sim) loglog.10 <- matrix(NA, ncol=3, nrow=sim) loglog.01 <- matrix(NA, ncol=3, nrow=sim) loglog.11 <- matrix(NA, ncol=3, nrow=sim) cloglog.00 <- matrix(NA, ncol=3, nrow=sim) cloglog.10 <- matrix(NA, ncol=3, nrow=sim) cloglog.01 <- matrix(NA, ncol=3, nrow=sim) cloglog.11 <- matrix(NA, ncol=3, nrow=sim) rq.00 <- matrix(NA, ncol=3, nrow=sim) rq.10 <- matrix(NA, ncol=3, nrow=sim) rq.01 <- matrix(NA, ncol=3, nrow=sim) rq.11 <- matrix(NA, ncol=3, nrow=sim) for(i in 1:sim){ try({ data <- generate.data.b(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta) orm.probit <- orm(y~z1+z2, data=data, family=probit, x=TRUE, y=TRUE) orm.logit <- orm(y~z1+z2, data=data, family=logistic, x=TRUE, y=TRUE) orm.loglog <- orm(y~z1+z2, data=data, family=loglog, x=TRUE, y=TRUE) orm.cloglog <- orm(y~z1+z2, data=data, family=cloglog, x=TRUE, y=TRUE) rq.mod <- rq(y~ z1+z2, data=data) quantile.probit <- quantile.orm(orm.probit, new.data,probs=0.5, se=TRUE) quantile.probit <- cbind(quantile.probit$quantile, quantile.probit$lb, quantile.probit$ub) quantile.logit <- quantile.orm(orm.logit, new.data, probs=0.5, se=TRUE) quantile.logit <- cbind(quantile.logit$quantile, quantile.logit$lb, quantile.logit$ub) quantile.loglog <- quantile.orm(orm.loglog, new.data,probs=0.5, se=TRUE) quantile.loglog <- cbind(quantile.loglog$quantile, quantile.loglog$lb, quantile.loglog$ub) quantile.cloglog <- quantile.orm(orm.cloglog, new.data, probs=0.5,se=TRUE) quantile.cloglog <- cbind(quantile.cloglog$quantile, quantile.cloglog$lb, quantile.cloglog$ub) quantile.rq <- predict(rq.mod, new.data, interval = "confidence", conf.int=0.95) probit.00[i,] <- quantile.probit[1,] probit.10[i,] <- quantile.probit[2,] probit.01[i,] <- quantile.probit[3,] probit.11[i,] <- quantile.probit[4,] logit.00[i,] <- quantile.logit[1,] logit.10[i,] <- quantile.logit[2,] logit.01[i,] <- quantile.logit[3,] logit.11[i,] <- quantile.logit[4,] loglog.00[i,] <- quantile.loglog[1,] loglog.10[i,] <- quantile.loglog[2,] loglog.01[i,] <- quantile.loglog[3,] loglog.11[i,] <- quantile.loglog[4,] cloglog.00[i,] <- quantile.cloglog[1,] cloglog.10[i,] <- quantile.cloglog[2,] cloglog.01[i,] <- quantile.cloglog[3,] cloglog.11[i,] <- quantile.cloglog[4,] rq.00[i,] <- quantile.rq[1,] rq.10[i,] <- quantile.rq[2,] rq.01[i,] <- quantile.rq[3,] rq.11[i,] <- quantile.rq[4,] }) } return(list(probit.00=probit.00, probit.10=probit.10, probit.01=probit.01, probit.11=probit.11, logit.00=logit.00, logit.10=logit.10, logit.01=logit.01, logit.11=logit.11, loglog.00=loglog.00, loglog.10=loglog.10, loglog.01=loglog.01, loglog.11=loglog.11, cloglog.00=cloglog.00, cloglog.10=cloglog.10, cloglog.01=cloglog.01, cloglog.11=cloglog.11, rq.00=rq.00, rq.10=rq.10, rq.01=rq.01, rq.11=rq.11 )) } sim4_median.fun.c <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1,-0.5),new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1))){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] probit.00 <- matrix(NA, ncol=3, nrow=sim) probit.10 <- matrix(NA, ncol=3, nrow=sim) probit.01 <- matrix(NA, ncol=3, nrow=sim) probit.11 <- matrix(NA, ncol=3, nrow=sim) colnames(probit.00) <- c("est", "lb", "ub") logit.00 <- matrix(NA, ncol=3, nrow=sim) logit.10 <- matrix(NA, ncol=3, nrow=sim) logit.01 <- matrix(NA, ncol=3, nrow=sim) logit.11 <- matrix(NA, ncol=3, nrow=sim) loglog.00 <- matrix(NA, ncol=3, nrow=sim) loglog.10 <- matrix(NA, ncol=3, nrow=sim) loglog.01 <- matrix(NA, ncol=3, nrow=sim) loglog.11 <- matrix(NA, ncol=3, nrow=sim) cloglog.00 <- matrix(NA, ncol=3, nrow=sim) cloglog.10 <- matrix(NA, ncol=3, nrow=sim) cloglog.01 <- matrix(NA, ncol=3, nrow=sim) cloglog.11 <- matrix(NA, ncol=3, nrow=sim) rq.00 <- matrix(NA, ncol=3, nrow=sim) rq.10 <- matrix(NA, ncol=3, nrow=sim) rq.01 <- matrix(NA, ncol=3, nrow=sim) rq.11 <- matrix(NA, ncol=3, nrow=sim) for(i in 1:sim){ try({ data <- generate.data.c(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta) orm.probit <- orm(y~z1+z2, data=data, family=probit, x=TRUE, y=TRUE) orm.logit <- orm(y~z1+z2, data=data, family=logistic, x=TRUE, y=TRUE) orm.loglog <- orm(y~z1+z2, data=data, family=loglog, x=TRUE, y=TRUE) orm.cloglog <- orm(y~z1+z2, data=data, family=cloglog, x=TRUE, y=TRUE) rq.mod <- rq(y~ z1+z2, data=data) quantile.probit <- quantile.orm(orm.probit, new.data,probs=0.5, se=TRUE) quantile.probit <- cbind(quantile.probit$quantile, quantile.probit$lb, quantile.probit$ub) quantile.logit <- quantile.orm(orm.logit, new.data, probs=0.5, se=TRUE) quantile.logit <- cbind(quantile.logit$quantile, quantile.logit$lb, quantile.logit$ub) quantile.loglog <- quantile.orm(orm.loglog, new.data,probs=0.5, se=TRUE) quantile.loglog <- cbind(quantile.loglog$quantile, quantile.loglog$lb, quantile.loglog$ub) quantile.cloglog <- quantile.orm(orm.cloglog, new.data, probs=0.5,se=TRUE) quantile.cloglog <- cbind(quantile.cloglog$quantile, quantile.cloglog$lb, quantile.cloglog$ub) quantile.rq <- predict(rq.mod, new.data, interval = "confidence", conf.int=0.95) probit.00[i,] <- quantile.probit[1,] probit.10[i,] <- quantile.probit[2,] probit.01[i,] <- quantile.probit[3,] probit.11[i,] <- quantile.probit[4,] logit.00[i,] <- quantile.logit[1,] logit.10[i,] <- quantile.logit[2,] logit.01[i,] <- quantile.logit[3,] logit.11[i,] <- quantile.logit[4,] loglog.00[i,] <- quantile.loglog[1,] loglog.10[i,] <- quantile.loglog[2,] loglog.01[i,] <- quantile.loglog[3,] loglog.11[i,] <- quantile.loglog[4,] cloglog.00[i,] <- quantile.cloglog[1,] cloglog.10[i,] <- quantile.cloglog[2,] cloglog.01[i,] <- quantile.cloglog[3,] cloglog.11[i,] <- quantile.cloglog[4,] rq.00[i,] <- quantile.rq[1,] rq.10[i,] <- quantile.rq[2,] rq.01[i,] <- quantile.rq[3,] rq.11[i,] <- quantile.rq[4,] }) } return(list(probit.00=probit.00, probit.10=probit.10, probit.01=probit.01, probit.11=probit.11, logit.00=logit.00, logit.10=logit.10, logit.01=logit.01, logit.11=logit.11, loglog.00=loglog.00, loglog.10=loglog.10, loglog.01=loglog.01, loglog.11=loglog.11, cloglog.00=cloglog.00, cloglog.10=cloglog.10, cloglog.01=cloglog.01, cloglog.11=cloglog.11, rq.00=rq.00, rq.10=rq.10, rq.01=rq.01, rq.11=rq.11 )) } sim4_median.fun.d <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1,-0.5),new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1))){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] probit.00 <- matrix(NA, ncol=3, nrow=sim) probit.10 <- matrix(NA, ncol=3, nrow=sim) probit.01 <- matrix(NA, ncol=3, nrow=sim) probit.11 <- matrix(NA, ncol=3, nrow=sim) colnames(probit.00) <- c("est", "lb", "ub") logit.00 <- matrix(NA, ncol=3, nrow=sim) logit.10 <- matrix(NA, ncol=3, nrow=sim) logit.01 <- matrix(NA, ncol=3, nrow=sim) logit.11 <- matrix(NA, ncol=3, nrow=sim) loglog.00 <- matrix(NA, ncol=3, nrow=sim) loglog.10 <- matrix(NA, ncol=3, nrow=sim) loglog.01 <- matrix(NA, ncol=3, nrow=sim) loglog.11 <- matrix(NA, ncol=3, nrow=sim) cloglog.00 <- matrix(NA, ncol=3, nrow=sim) cloglog.10 <- matrix(NA, ncol=3, nrow=sim) cloglog.01 <- matrix(NA, ncol=3, nrow=sim) cloglog.11 <- matrix(NA, ncol=3, nrow=sim) rq.00 <- matrix(NA, ncol=3, nrow=sim) rq.10 <- matrix(NA, ncol=3, nrow=sim) rq.01 <- matrix(NA, ncol=3, nrow=sim) rq.11 <- matrix(NA, ncol=3, nrow=sim) for(i in 1:sim){ try({ data <- generate.data.d(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta) orm.probit <- orm(y~z1+z2, data=data, family=probit, x=TRUE, y=TRUE) orm.logit <- orm(y~z1+z2, data=data, family=logistic, x=TRUE, y=TRUE) orm.loglog <- orm(y~z1+z2, data=data, family=loglog, x=TRUE, y=TRUE) orm.cloglog <- orm(y~z1+z2, data=data, family=cloglog, x=TRUE, y=TRUE) rq.mod <- rq(y~ z1+z2, data=data) quantile.probit <- quantile.orm(orm.probit, new.data,probs=0.5, se=TRUE) quantile.probit <- cbind(quantile.probit$quantile, quantile.probit$lb, quantile.probit$ub) quantile.logit <- quantile.orm(orm.logit, new.data, probs=0.5, se=TRUE) quantile.logit <- cbind(quantile.logit$quantile, quantile.logit$lb, quantile.logit$ub) quantile.loglog <- quantile.orm(orm.loglog, new.data,probs=0.5, se=TRUE) quantile.loglog <- cbind(quantile.loglog$quantile, quantile.loglog$lb, quantile.loglog$ub) quantile.cloglog <- quantile.orm(orm.cloglog, new.data, probs=0.5,se=TRUE) quantile.cloglog <- cbind(quantile.cloglog$quantile, quantile.cloglog$lb, quantile.cloglog$ub) quantile.rq <- predict(rq.mod, new.data, interval = "confidence", conf.int=0.95) probit.00[i,] <- quantile.probit[1,] probit.10[i,] <- quantile.probit[2,] probit.01[i,] <- quantile.probit[3,] probit.11[i,] <- quantile.probit[4,] logit.00[i,] <- quantile.logit[1,] logit.10[i,] <- quantile.logit[2,] logit.01[i,] <- quantile.logit[3,] logit.11[i,] <- quantile.logit[4,] loglog.00[i,] <- quantile.loglog[1,] loglog.10[i,] <- quantile.loglog[2,] loglog.01[i,] <- quantile.loglog[3,] loglog.11[i,] <- quantile.loglog[4,] cloglog.00[i,] <- quantile.cloglog[1,] cloglog.10[i,] <- quantile.cloglog[2,] cloglog.01[i,] <- quantile.cloglog[3,] cloglog.11[i,] <- quantile.cloglog[4,] rq.00[i,] <- quantile.rq[1,] rq.10[i,] <- quantile.rq[2,] rq.01[i,] <- quantile.rq[3,] rq.11[i,] <- quantile.rq[4,] }) } return(list(probit.00=probit.00, probit.10=probit.10, probit.01=probit.01, probit.11=probit.11, logit.00=logit.00, logit.10=logit.10, logit.01=logit.01, logit.11=logit.11, loglog.00=loglog.00, loglog.10=loglog.10, loglog.01=loglog.01, loglog.11=loglog.11, cloglog.00=cloglog.00, cloglog.10=cloglog.10, cloglog.01=cloglog.01, cloglog.11=cloglog.11, rq.00=rq.00, rq.10=rq.10, rq.01=rq.01, rq.11=rq.11 )) } sim4_median.fun.e <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1,-0.5),new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1))){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] probit.00 <- matrix(NA, ncol=3, nrow=sim) probit.10 <- matrix(NA, ncol=3, nrow=sim) probit.01 <- matrix(NA, ncol=3, nrow=sim) probit.11 <- matrix(NA, ncol=3, nrow=sim) colnames(probit.00) <- c("est", "lb", "ub") logit.00 <- matrix(NA, ncol=3, nrow=sim) logit.10 <- matrix(NA, ncol=3, nrow=sim) logit.01 <- matrix(NA, ncol=3, nrow=sim) logit.11 <- matrix(NA, ncol=3, nrow=sim) loglog.00 <- matrix(NA, ncol=3, nrow=sim) loglog.10 <- matrix(NA, ncol=3, nrow=sim) loglog.01 <- matrix(NA, ncol=3, nrow=sim) loglog.11 <- matrix(NA, ncol=3, nrow=sim) cloglog.00 <- matrix(NA, ncol=3, nrow=sim) cloglog.10 <- matrix(NA, ncol=3, nrow=sim) cloglog.01 <- matrix(NA, ncol=3, nrow=sim) cloglog.11 <- matrix(NA, ncol=3, nrow=sim) rq.00 <- matrix(NA, ncol=3, nrow=sim) rq.10 <- matrix(NA, ncol=3, nrow=sim) rq.01 <- matrix(NA, ncol=3, nrow=sim) rq.11 <- matrix(NA, ncol=3, nrow=sim) for(i in 1:sim){ try({ data <- generate.data.e(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta) orm.probit <- orm(y~z1+z2, data=data, family=probit, x=TRUE, y=TRUE) orm.logit <- orm(y~z1+z2, data=data, family=logistic, x=TRUE, y=TRUE) orm.loglog <- orm(y~z1+z2, data=data, family=loglog, x=TRUE, y=TRUE) orm.cloglog <- orm(y~z1+z2, data=data, family=cloglog, x=TRUE, y=TRUE) rq.mod <- rq(y~ z1+z2, data=data) quantile.probit <- quantile.orm(orm.probit, new.data,probs=0.5, se=TRUE) quantile.probit <- cbind(quantile.probit$quantile, quantile.probit$lb, quantile.probit$ub) quantile.logit <- quantile.orm(orm.logit, new.data, probs=0.5, se=TRUE) quantile.logit <- cbind(quantile.logit$quantile, quantile.logit$lb, quantile.logit$ub) quantile.loglog <- quantile.orm(orm.loglog, new.data,probs=0.5, se=TRUE) quantile.loglog <- cbind(quantile.loglog$quantile, quantile.loglog$lb, quantile.loglog$ub) quantile.cloglog <- quantile.orm(orm.cloglog, new.data, probs=0.5,se=TRUE) quantile.cloglog <- cbind(quantile.cloglog$quantile, quantile.cloglog$lb, quantile.cloglog$ub) quantile.rq <- predict(rq.mod, new.data, interval = "confidence", conf.int=0.95) probit.00[i,] <- quantile.probit[1,] probit.10[i,] <- quantile.probit[2,] probit.01[i,] <- quantile.probit[3,] probit.11[i,] <- quantile.probit[4,] logit.00[i,] <- quantile.logit[1,] logit.10[i,] <- quantile.logit[2,] logit.01[i,] <- quantile.logit[3,] logit.11[i,] <- quantile.logit[4,] loglog.00[i,] <- quantile.loglog[1,] loglog.10[i,] <- quantile.loglog[2,] loglog.01[i,] <- quantile.loglog[3,] loglog.11[i,] <- quantile.loglog[4,] cloglog.00[i,] <- quantile.cloglog[1,] cloglog.10[i,] <- quantile.cloglog[2,] cloglog.01[i,] <- quantile.cloglog[3,] cloglog.11[i,] <- quantile.cloglog[4,] rq.00[i,] <- quantile.rq[1,] rq.10[i,] <- quantile.rq[2,] rq.01[i,] <- quantile.rq[3,] rq.11[i,] <- quantile.rq[4,] }) } return(list(probit.00=probit.00, probit.10=probit.10, probit.01=probit.01, probit.11=probit.11, logit.00=logit.00, logit.10=logit.10, logit.01=logit.01, logit.11=logit.11, loglog.00=loglog.00, loglog.10=loglog.10, loglog.01=loglog.01, loglog.11=loglog.11, cloglog.00=cloglog.00, cloglog.10=cloglog.10, cloglog.01=cloglog.01, cloglog.11=cloglog.11, rq.00=rq.00, rq.10=rq.10, rq.01=rq.01, rq.11=rq.11 )) } sim4_median.fun.f <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1,-0.5),new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1))){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] probit.00 <- matrix(NA, ncol=3, nrow=sim) probit.10 <- matrix(NA, ncol=3, nrow=sim) probit.01 <- matrix(NA, ncol=3, nrow=sim) probit.11 <- matrix(NA, ncol=3, nrow=sim) colnames(probit.00) <- c("est", "lb", "ub") logit.00 <- matrix(NA, ncol=3, nrow=sim) logit.10 <- matrix(NA, ncol=3, nrow=sim) logit.01 <- matrix(NA, ncol=3, nrow=sim) logit.11 <- matrix(NA, ncol=3, nrow=sim) loglog.00 <- matrix(NA, ncol=3, nrow=sim) loglog.10 <- matrix(NA, ncol=3, nrow=sim) loglog.01 <- matrix(NA, ncol=3, nrow=sim) loglog.11 <- matrix(NA, ncol=3, nrow=sim) cloglog.00 <- matrix(NA, ncol=3, nrow=sim) cloglog.10 <- matrix(NA, ncol=3, nrow=sim) cloglog.01 <- matrix(NA, ncol=3, nrow=sim) cloglog.11 <- matrix(NA, ncol=3, nrow=sim) rq.00 <- matrix(NA, ncol=3, nrow=sim) rq.10 <- matrix(NA, ncol=3, nrow=sim) rq.01 <- matrix(NA, ncol=3, nrow=sim) rq.11 <- matrix(NA, ncol=3, nrow=sim) for(i in 1:sim){ try({ data <- generate.data.f(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta) orm.probit <- orm(y~z1+z2, data=data, family=probit, x=TRUE, y=TRUE) orm.logit <- orm(y~z1+z2, data=data, family=logistic, x=TRUE, y=TRUE) orm.loglog <- orm(y~z1+z2, data=data, family=loglog, x=TRUE, y=TRUE) orm.cloglog <- orm(y~z1+z2, data=data, family=cloglog, x=TRUE, y=TRUE) rq.mod <- rq(y~ z1+z2, data=data) quantile.probit <- quantile.orm(orm.probit, new.data,probs=0.5, se=TRUE) quantile.probit <- cbind(quantile.probit$quantile, quantile.probit$lb, quantile.probit$ub) quantile.logit <- quantile.orm(orm.logit, new.data, probs=0.5, se=TRUE) quantile.logit <- cbind(quantile.logit$quantile, quantile.logit$lb, quantile.logit$ub) quantile.loglog <- quantile.orm(orm.loglog, new.data,probs=0.5, se=TRUE) quantile.loglog <- cbind(quantile.loglog$quantile, quantile.loglog$lb, quantile.loglog$ub) quantile.cloglog <- quantile.orm(orm.cloglog, new.data, probs=0.5,se=TRUE) quantile.cloglog <- cbind(quantile.cloglog$quantile, quantile.cloglog$lb, quantile.cloglog$ub) quantile.rq <- predict(rq.mod, new.data, interval = "confidence", conf.int=0.95) probit.00[i,] <- quantile.probit[1,] probit.10[i,] <- quantile.probit[2,] probit.01[i,] <- quantile.probit[3,] probit.11[i,] <- quantile.probit[4,] logit.00[i,] <- quantile.logit[1,] logit.10[i,] <- quantile.logit[2,] logit.01[i,] <- quantile.logit[3,] logit.11[i,] <- quantile.logit[4,] loglog.00[i,] <- quantile.loglog[1,] loglog.10[i,] <- quantile.loglog[2,] loglog.01[i,] <- quantile.loglog[3,] loglog.11[i,] <- quantile.loglog[4,] cloglog.00[i,] <- quantile.cloglog[1,] cloglog.10[i,] <- quantile.cloglog[2,] cloglog.01[i,] <- quantile.cloglog[3,] cloglog.11[i,] <- quantile.cloglog[4,] rq.00[i,] <- quantile.rq[1,] rq.10[i,] <- quantile.rq[2,] rq.01[i,] <- quantile.rq[3,] rq.11[i,] <- quantile.rq[4,] }) } return(list(probit.00=probit.00, probit.10=probit.10, probit.01=probit.01, probit.11=probit.11, logit.00=logit.00, logit.10=logit.10, logit.01=logit.01, logit.11=logit.11, loglog.00=loglog.00, loglog.10=loglog.10, loglog.01=loglog.01, loglog.11=loglog.11, cloglog.00=cloglog.00, cloglog.10=cloglog.10, cloglog.01=cloglog.01, cloglog.11=cloglog.11, rq.00=rq.00, rq.10=rq.10, rq.01=rq.01, rq.11=rq.11 )) } sim4_median.fun.g <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1,-0.5),new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1))){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] probit.00 <- matrix(NA, ncol=3, nrow=sim) probit.10 <- matrix(NA, ncol=3, nrow=sim) probit.01 <- matrix(NA, ncol=3, nrow=sim) probit.11 <- matrix(NA, ncol=3, nrow=sim) colnames(probit.00) <- c("est", "lb", "ub") logit.00 <- matrix(NA, ncol=3, nrow=sim) logit.10 <- matrix(NA, ncol=3, nrow=sim) logit.01 <- matrix(NA, ncol=3, nrow=sim) logit.11 <- matrix(NA, ncol=3, nrow=sim) loglog.00 <- matrix(NA, ncol=3, nrow=sim) loglog.10 <- matrix(NA, ncol=3, nrow=sim) loglog.01 <- matrix(NA, ncol=3, nrow=sim) loglog.11 <- matrix(NA, ncol=3, nrow=sim) cloglog.00 <- matrix(NA, ncol=3, nrow=sim) cloglog.10 <- matrix(NA, ncol=3, nrow=sim) cloglog.01 <- matrix(NA, ncol=3, nrow=sim) cloglog.11 <- matrix(NA, ncol=3, nrow=sim) rq.00 <- matrix(NA, ncol=3, nrow=sim) rq.10 <- matrix(NA, ncol=3, nrow=sim) rq.01 <- matrix(NA, ncol=3, nrow=sim) rq.11 <- matrix(NA, ncol=3, nrow=sim) for(i in 1:sim){ try({ data <- generate.data.g(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta) orm.probit <- orm(y~z1+z2, data=data, family=probit, x=TRUE, y=TRUE) orm.logit <- orm(y~z1+z2, data=data, family=logistic, x=TRUE, y=TRUE) orm.loglog <- orm(y~z1+z2, data=data, family=loglog, x=TRUE, y=TRUE) orm.cloglog <- orm(y~z1+z2, data=data, family=cloglog, x=TRUE, y=TRUE) rq.mod <- rq(y~ z1+z2, data=data) quantile.probit <- quantile.orm(orm.probit, new.data,probs=0.5, se=TRUE) quantile.probit <- cbind(quantile.probit$quantile, quantile.probit$lb, quantile.probit$ub) quantile.logit <- quantile.orm(orm.logit, new.data, probs=0.5, se=TRUE) quantile.logit <- cbind(quantile.logit$quantile, quantile.logit$lb, quantile.logit$ub) quantile.loglog <- quantile.orm(orm.loglog, new.data,probs=0.5, se=TRUE) quantile.loglog <- cbind(quantile.loglog$quantile, quantile.loglog$lb, quantile.loglog$ub) quantile.cloglog <- quantile.orm(orm.cloglog, new.data, probs=0.5,se=TRUE) quantile.cloglog <- cbind(quantile.cloglog$quantile, quantile.cloglog$lb, quantile.cloglog$ub) quantile.rq <- predict(rq.mod, new.data, interval = "confidence", conf.int=0.95) probit.00[i,] <- quantile.probit[1,] probit.10[i,] <- quantile.probit[2,] probit.01[i,] <- quantile.probit[3,] probit.11[i,] <- quantile.probit[4,] logit.00[i,] <- quantile.logit[1,] logit.10[i,] <- quantile.logit[2,] logit.01[i,] <- quantile.logit[3,] logit.11[i,] <- quantile.logit[4,] loglog.00[i,] <- quantile.loglog[1,] loglog.10[i,] <- quantile.loglog[2,] loglog.01[i,] <- quantile.loglog[3,] loglog.11[i,] <- quantile.loglog[4,] cloglog.00[i,] <- quantile.cloglog[1,] cloglog.10[i,] <- quantile.cloglog[2,] cloglog.01[i,] <- quantile.cloglog[3,] cloglog.11[i,] <- quantile.cloglog[4,] rq.00[i,] <- quantile.rq[1,] rq.10[i,] <- quantile.rq[2,] rq.01[i,] <- quantile.rq[3,] rq.11[i,] <- quantile.rq[4,] }) } return(list(probit.00=probit.00, probit.10=probit.10, probit.01=probit.01, probit.11=probit.11, logit.00=logit.00, logit.10=logit.10, logit.01=logit.01, logit.11=logit.11, loglog.00=loglog.00, loglog.10=loglog.10, loglog.01=loglog.01, loglog.11=loglog.11, cloglog.00=cloglog.00, cloglog.10=cloglog.10, cloglog.01=cloglog.01, cloglog.11=cloglog.11, rq.00=rq.00, rq.10=rq.10, rq.01=rq.01, rq.11=rq.11 )) } sim4_median.fun.h <- function(sim=100,seed=1, n=50, p=0.5, alpha=0, beta=c(1,-0.5),new.data=data.frame(z1=c(0,1, 0, 1), z2=c(0, 0, 1, 1))){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] probit.00 <- matrix(NA, ncol=3, nrow=sim) probit.10 <- matrix(NA, ncol=3, nrow=sim) probit.01 <- matrix(NA, ncol=3, nrow=sim) probit.11 <- matrix(NA, ncol=3, nrow=sim) colnames(probit.00) <- c("est", "lb", "ub") logit.00 <- matrix(NA, ncol=3, nrow=sim) logit.10 <- matrix(NA, ncol=3, nrow=sim) logit.01 <- matrix(NA, ncol=3, nrow=sim) logit.11 <- matrix(NA, ncol=3, nrow=sim) loglog.00 <- matrix(NA, ncol=3, nrow=sim) loglog.10 <- matrix(NA, ncol=3, nrow=sim) loglog.01 <- matrix(NA, ncol=3, nrow=sim) loglog.11 <- matrix(NA, ncol=3, nrow=sim) cloglog.00 <- matrix(NA, ncol=3, nrow=sim) cloglog.10 <- matrix(NA, ncol=3, nrow=sim) cloglog.01 <- matrix(NA, ncol=3, nrow=sim) cloglog.11 <- matrix(NA, ncol=3, nrow=sim) rq.00 <- matrix(NA, ncol=3, nrow=sim) rq.10 <- matrix(NA, ncol=3, nrow=sim) rq.01 <- matrix(NA, ncol=3, nrow=sim) rq.11 <- matrix(NA, ncol=3, nrow=sim) for(i in 1:sim){ try({ data <- generate.data.h(seed=seeds[i], n=n,p=p, alpha=alpha, beta=beta) orm.probit <- orm(y~z1+z2, data=data, family=probit, x=TRUE, y=TRUE) orm.logit <- orm(y~z1+z2, data=data, family=logistic, x=TRUE, y=TRUE) orm.loglog <- orm(y~z1+z2, data=data, family=loglog, x=TRUE, y=TRUE) orm.cloglog <- orm(y~z1+z2, data=data, family=cloglog, x=TRUE, y=TRUE) rq.mod <- rq(y~ z1+z2, data=data) quantile.probit <- quantile.orm(orm.probit, new.data,probs=0.5, se=TRUE) quantile.probit <- cbind(quantile.probit$quantile, quantile.probit$lb, quantile.probit$ub) quantile.logit <- quantile.orm(orm.logit, new.data, probs=0.5, se=TRUE) quantile.logit <- cbind(quantile.logit$quantile, quantile.logit$lb, quantile.logit$ub) quantile.loglog <- quantile.orm(orm.loglog, new.data,probs=0.5, se=TRUE) quantile.loglog <- cbind(quantile.loglog$quantile, quantile.loglog$lb, quantile.loglog$ub) quantile.cloglog <- quantile.orm(orm.cloglog, new.data, probs=0.5,se=TRUE) quantile.cloglog <- cbind(quantile.cloglog$quantile, quantile.cloglog$lb, quantile.cloglog$ub) quantile.rq <- predict(rq.mod, new.data, interval = "confidence", conf.int=0.95) probit.00[i,] <- quantile.probit[1,] probit.10[i,] <- quantile.probit[2,] probit.01[i,] <- quantile.probit[3,] probit.11[i,] <- quantile.probit[4,] logit.00[i,] <- quantile.logit[1,] logit.10[i,] <- quantile.logit[2,] logit.01[i,] <- quantile.logit[3,] logit.11[i,] <- quantile.logit[4,] loglog.00[i,] <- quantile.loglog[1,] loglog.10[i,] <- quantile.loglog[2,] loglog.01[i,] <- quantile.loglog[3,] loglog.11[i,] <- quantile.loglog[4,] cloglog.00[i,] <- quantile.cloglog[1,] cloglog.10[i,] <- quantile.cloglog[2,] cloglog.01[i,] <- quantile.cloglog[3,] cloglog.11[i,] <- quantile.cloglog[4,] rq.00[i,] <- quantile.rq[1,] rq.10[i,] <- quantile.rq[2,] rq.01[i,] <- quantile.rq[3,] rq.11[i,] <- quantile.rq[4,] }) } return(list(probit.00=probit.00, probit.10=probit.10, probit.01=probit.01, probit.11=probit.11, logit.00=logit.00, logit.10=logit.10, logit.01=logit.01, logit.11=logit.11, loglog.00=loglog.00, loglog.10=loglog.10, loglog.01=loglog.01, loglog.11=loglog.11, cloglog.00=cloglog.00, cloglog.10=cloglog.10, cloglog.01=cloglog.01, cloglog.11=cloglog.11, rq.00=rq.00, rq.10=rq.10, rq.01=rq.01, rq.11=rq.11 )) } seed=1 p <- 0.5 alpha <- 0 beta <- c(1, -0.5) sigma <- 1 sim=10^4 #sim=1 sim4_median.a.n50 <-sim4_median.fun.a(sim=sim, n=50) sim4_median.b.n50 <-sim4_median.fun.b(sim=sim, n=50) sim4_median.c.n50 <-sim4_median.fun.c(sim=sim, n=50) sim4_median.d.n50 <-sim4_median.fun.d(sim=sim, n=50) sim4_median.e.n50 <-sim4_median.fun.e(sim=sim, n=50) sim4_median.f.n50 <-sim4_median.fun.f(sim=sim, n=50) sim4_median.g.n50 <-sim4_median.fun.g(sim=sim, n=50) sim4_median.h.n50 <-sim4_median.fun.h(sim=sim, n=50) sim4_median.a.n100 <-sim4_median.fun.a(sim=sim, n=100) sim4_median.b.n100 <-sim4_median.fun.b(sim=sim, n=100) sim4_median.c.n100 <-sim4_median.fun.c(sim=sim, n=100) sim4_median.d.n100 <-sim4_median.fun.d(sim=sim, n=100) sim4_median.e.n100 <-sim4_median.fun.e(sim=sim, n=100) sim4_median.f.n100 <-sim4_median.fun.f(sim=sim, n=100) sim4_median.g.n100 <-sim4_median.fun.g(sim=sim, n=100) sim4_median.h.n100 <-sim4_median.fun.h(sim=sim, n=100) sim4_median.a.n200 <-sim4_median.fun.a(sim=sim, n=200) sim4_median.b.n200 <-sim4_median.fun.b(sim=sim, n=200) sim4_median.c.n200 <-sim4_median.fun.c(sim=sim, n=200) sim4_median.d.n200 <-sim4_median.fun.d(sim=sim, n=200) sim4_median.e.n200 <-sim4_median.fun.e(sim=sim, n=200) sim4_median.f.n200 <-sim4_median.fun.f(sim=sim, n=200) sim4_median.g.n200 <-sim4_median.fun.g(sim=sim, n=200) sim4_median.h.n200 <-sim4_median.fun.h(sim=sim, n=200) sim4_median.summary <- function(true=as.matrix(new.data)%*%beta, result){ result.00.est <- c(mean(result$probit.00[,1], na.rm=TRUE), mean(result$logit.00[,1], na.rm=TRUE), mean(result$loglog.00[,1], na.rm=TRUE), mean(result$cloglog.00[,1], na.rm=TRUE)) result.00.coverage <- c(mean(result$probit.00[,2]<=true[1,1]&result$probit.00[,3]>=true[1,1], na.rm=TRUE), mean(result$logit.00[,2]<=true[1,1]&result$logit.00[,3]>=true[1,1], na.rm=TRUE), mean(result$loglog.00[,2]<=true[1,1]&result$loglog.00[,3]>=true[1,1], na.rm=TRUE), mean(result$cloglog.00[,2]<=true[1,1]&result$cloglog.00[,3]>=true[1,1], na.rm=TRUE)) result.00.mse <- c(mean((result$probit.00[,1]-true[1,1])^2, na.rm=TRUE), mean( (result$logit.00[,1]-true[1,1])^2, na.rm=TRUE), mean((result$loglog.00[,1]-true[1,1])^2, na.rm=TRUE), mean((result$cloglog.00[,1]-true[1,1])^2, na.rm=TRUE), mean((result$rq.00[,1]-true[1,1])^2, na.rm=TRUE) ) result.00.re <- (result.00.mse[5]/result.00.mse)[-5] result.00 <- cbind(format(round(result.00.est,2), nsmall=2), format(round(result.00.coverage, 3), nsmall=3), format(round(result.00.re,3), nsmall=3)) result.00.out <- c(format(round(true[1,1],2),nsmall=0), result.00[1,], result.00[2,], result.00[3,], result.00[4,]) ######## 10 result.10.est <- c(mean(result$probit.10[,1], na.rm=TRUE), mean(result$logit.10[,1], na.rm=TRUE), mean(result$loglog.10[,1], na.rm=TRUE), mean(result$cloglog.10[,1], na.rm=TRUE)) result.10.coverage <- c(mean(result$probit.10[,2]<=true[2,1]&result$probit.10[,3]>=true[2,1], na.rm=TRUE), mean(result$logit.10[,2]<=true[2,1]&result$logit.10[,3]>=true[2,1], na.rm=TRUE), mean(result$loglog.10[,2]<=true[2,1]&result$loglog.10[,3]>=true[2,1], na.rm=TRUE), mean(result$cloglog.10[,2]<=true[2,1]&result$cloglog.10[,3]>=true[2,1], na.rm=TRUE)) result.10.mse <- c(mean((result$probit.10[,1]-true[2,1])^2, na.rm=TRUE), mean( (result$logit.10[,1]-true[2,1])^2, na.rm=TRUE), mean((result$loglog.10[,1]-true[2,1])^2, na.rm=TRUE), mean((result$cloglog.10[,1]-true[2,1])^2, na.rm=TRUE), mean((result$rq.10[,1]-true[2,1])^2, na.rm=TRUE) ) result.10.re <- (result.10.mse[5]/result.10.mse)[-5] result.10 <- cbind(format(round(result.10.est,2), nsmall=2), format(round(result.10.coverage, 3), nsmall=3), format(round(result.10.re,3), nsmall=3)) result.10.out <- c(format(round(true[2,1],2),nsmall=0), result.10[1,], result.10[2,], result.10[3,], result.10[4,]) ###### 01 result.01.est <- c(mean(result$probit.01[,1], na.rm=TRUE), mean(result$logit.01[,1], na.rm=TRUE), mean(result$loglog.01[,1], na.rm=TRUE), mean(result$cloglog.01[,1], na.rm=TRUE)) result.01.coverage <- c(mean(result$probit.01[,2]<=true[3,1]&result$probit.01[,3]>=true[3,1], na.rm=TRUE), mean(result$logit.01[,2]<=true[3,1]&result$logit.01[,3]>=true[3,1], na.rm=TRUE), mean(result$loglog.01[,2]<=true[3,1]&result$loglog.01[,3]>=true[3,1], na.rm=TRUE), mean(result$cloglog.01[,2]<=true[3,1]&result$cloglog.01[,3]>=true[3,1], na.rm=TRUE)) result.01.mse <- c(mean((result$probit.01[,1]-true[3,1])^2, na.rm=TRUE), mean( (result$logit.01[,1]-true[3,1])^2, na.rm=TRUE), mean((result$loglog.01[,1]-true[3,1])^2, na.rm=TRUE), mean((result$cloglog.01[,1]-true[3,1])^2, na.rm=TRUE), mean((result$rq.01[,1]-true[3,1])^2, na.rm=TRUE) ) result.01.re <- (result.01.mse[5]/result.01.mse)[-5] result.01 <- cbind(format(round(result.01.est,2), nsmall=2), format(round(result.01.coverage, 3), nsmall=3), format(round(result.01.re,3), nsmall=3)) result.01.out <- c(format(round(true[3,1],2),nsmall=0), result.01[1,], result.01[2,], result.01[3,], result.01[4,]) ###### 11 result.11.est <- c(mean(result$probit.11[,1], na.rm=TRUE), mean(result$logit.11[,1], na.rm=TRUE), mean(result$loglog.11[,1], na.rm=TRUE), mean(result$cloglog.11[,1], na.rm=TRUE)) result.11.coverage <- c(mean(result$probit.11[,2]<=true[4,1]&result$probit.11[,3]>=true[4,1], na.rm=TRUE), mean(result$logit.11[,2]<=true[4,1]&result$logit.11[,3]>=true[4,1], na.rm=TRUE), mean(result$loglog.11[,2]<=true[4,1]&result$loglog.11[,3]>=true[4,1], na.rm=TRUE), mean(result$cloglog.11[,2]<=true[4,1]&result$cloglog.11[,3]>=true[4,1], na.rm=TRUE)) result.11.mse <- c(mean((result$probit.11[,1]-true[4,1])^2, na.rm=TRUE), mean( (result$logit.11[,1]-true[4,1])^2, na.rm=TRUE), mean((result$loglog.11[,1]-true[4,1])^2, na.rm=TRUE), mean((result$cloglog.11[,1]-true[4,1])^2, na.rm=TRUE), mean((result$rq.11[,1]-true[4,1])^2, na.rm=TRUE) ) result.11.re <- (result.11.mse[5]/result.11.mse)[-5] result.11 <- cbind(format(round(result.11.est,2), nsmall=2), format(round(result.11.coverage, 3), nsmall=3), format(round(result.11.re,3), nsmall=3)) result.11.out <- c(format(round(true[4,1],2),nsmall=0), result.11[1,], result.11[2,], result.11[3,], result.11[4,]) result.out <- rbind(result.00.out, result.10.out, result.01.out, result.11.out) return(result.out) } ############### Figure 10; Tables S.13 and S.14 ################### result.sim4_median.a.n100 <- sim4_median.summary(true=as.matrix(new.data)%*%beta, result=sim4_median.a.n100) result.sim4_median.b.n100 <- sim4_median.summary(true=as.matrix(new.data)%*%beta, result=sim4_median.b.n100) result.sim4_median.c.n100 <- sim4_median.summary(true=log(-log(0.5))+as.matrix(new.data)%*%beta, result=sim4_median.c.n100) result.sim4_median.d.n100 <- sim4_median.summary(true=-log(-log(0.5))+as.matrix(new.data)%*%beta, result=sim4_median.d.n100) result.sim4_median.e.n100 <- sim4_median.summary(true=as.matrix(new.data)%*%beta, result=sim4_median.e.n100) result.sim4_median.f.n100 <- sim4_median.summary(true=as.matrix(new.data)%*%beta, result=sim4_median.f.n100) result.sim4_median.g.n100 <- sim4_median.summary(true=(qbeta(0.5, 5,2) -5/7)*sqrt((5+2)^2*(5+2+1)/5/2)+as.matrix(new.data)%*%beta, result=sim4_median.g.n100) result.sim4_median.h.n100 <- sim4_median.summary(true=(qbeta(0.5, 2,5) -2/7)*sqrt((5+2)^2*(5+2+1)/5/2)+as.matrix(new.data)%*%beta, result=sim4_median.h.n100) sim4_median.out.n100 <- rbind(rep("", 13), result.sim4_median.a.n100, rep("", 13), rep("", 13), result.sim4_median.b.n100, rep("", 13), rep("", 13), result.sim4_median.c.n100, rep("", 13), rep("", 13), result.sim4_median.d.n100, rep("", 13), rep("", 13), result.sim4_median.e.n100, rep("", 13), rep("", 13), result.sim4_median.f.n100, rep("", 13), rep("", 13), result.sim4_median.g.n100, rep("", 13), rep("", 13), result.sim4_median.h.n100) ############### Tables S.8 and S.9 ################### result.sim4_median.a.n50 <- sim4_median.summary(true=as.matrix(new.data)%*%beta, result=sim4_median.a.n50) result.sim4_median.b.n50 <- sim4_median.summary(true=as.matrix(new.data)%*%beta, result=sim4_median.b.n50) result.sim4_median.c.n50 <- sim4_median.summary(true=log(-log(0.5))+as.matrix(new.data)%*%beta, result=sim4_median.c.n50) result.sim4_median.d.n50 <- sim4_median.summary(true=-log(-log(0.5))+as.matrix(new.data)%*%beta, result=sim4_median.d.n50) result.sim4_median.e.n50 <- sim4_median.summary(true=as.matrix(new.data)%*%beta, result=sim4_median.e.n50) result.sim4_median.f.n50 <- sim4_median.summary(true=as.matrix(new.data)%*%beta, result=sim4_median.f.n50) result.sim4_median.g.n50 <- sim4_median.summary(true=(qbeta(0.5, 5,2) -5/7)*sqrt((5+2)^2*(5+2+1)/5/2)+as.matrix(new.data)%*%beta, result=sim4_median.g.n50) result.sim4_median.h.n50 <- sim4_median.summary(true=(qbeta(0.5, 2,5) -2/7)*sqrt((5+2)^2*(5+2+1)/5/2)+as.matrix(new.data)%*%beta, result=sim4_median.h.n50) sim4_median.out.n50 <- rbind(rep("", 13), result.sim4_median.a.n50, rep("", 13), rep("", 13), result.sim4_median.b.n50, rep("", 13), rep("", 13), result.sim4_median.c.n50, rep("", 13), rep("", 13), result.sim4_median.d.n50, rep("", 13), rep("", 13), result.sim4_median.e.n50, rep("", 13), rep("", 13), result.sim4_median.f.n50, rep("", 13), rep("", 13), result.sim4_median.g.n50, rep("", 13), rep("", 13), result.sim4_median.h.n50) ############### Tables S.16 and S.17 ################### result.sim4_median.a.n200 <- sim4_median.summary(true=as.matrix(new.data)%*%beta, result=sim4_median.a.n200) result.sim4_median.b.n200 <- sim4_median.summary(true=as.matrix(new.data)%*%beta, result=sim4_median.b.n200) result.sim4_median.c.n200 <- sim4_median.summary(true=log(-log(0.5))+as.matrix(new.data)%*%beta, result=sim4_median.c.n200) result.sim4_median.d.n200 <- sim4_median.summary(true=-log(-log(0.5))+as.matrix(new.data)%*%beta, result=sim4_median.d.n200) result.sim4_median.e.n200 <- sim4_median.summary(true=as.matrix(new.data)%*%beta, result=sim4_median.e.n200) result.sim4_median.f.n200 <- sim4_median.summary(true=as.matrix(new.data)%*%beta, result=sim4_median.f.n200) result.sim4_median.g.n200 <- sim4_median.summary(true=(qbeta(0.5, 5,2) -5/7)*sqrt((5+2)^2*(5+2+1)/5/2)+as.matrix(new.data)%*%beta, result=sim4_median.g.n200) result.sim4_median.h.n200 <- sim4_median.summary(true=(qbeta(0.5, 2,5) -2/7)*sqrt((5+2)^2*(5+2+1)/5/2)+as.matrix(new.data)%*%beta, result=sim4_median.h.n200) sim4_median.out.n200 <- rbind(rep("", 13), result.sim4_median.a.n200, rep("", 13), rep("", 13), result.sim4_median.b.n200, rep("", 13), rep("", 13), result.sim4_median.c.n200, rep("", 13), rep("", 13), result.sim4_median.d.n200, rep("", 13), rep("", 13), result.sim4_median.e.n200, rep("", 13), rep("", 13), result.sim4_median.f.n200, rep("", 13), rep("", 13), result.sim4_median.g.n200, rep("", 13), rep("", 13), result.sim4_median.h.n200) ####### SIMULATIONS IN SUPPLEMENTAL MATERIALS ############# ###### Sim 3: orm with an automatic link function selection procedure library(rms) rGumbelMin<- function(n, mu=0, sigma=1){ u <- runif(n, min=0, max=1) x <- mu + sigma*log(-log(1-u)) return(x) } rGumbelMax<- function(n, mu=0, sigma=1){ u <- runif(n, min=0, max=1) x <- mu + sigma*(-log(-log(u))) return(x) } generate.data <- function(seed=1, n=100, family=c("probit", "logistic", "gumbelMin", "gumbelMax", "cauchy"), z.type=c("continuous", "binary"), p=0.5,sigma.z=1, alpha=0, beta=1, sigma=1){ set.seed(seed) if(z.type[1]=="continuous"){ z <- rnorm(n, 0, sigma.z) } else if (z.type=="binary"){ z <- sample(c(0,1),size=n, replace=TRUE, prob=c(1-p, p)) } else stop("Need to specify the covariate Z to be continuous or binary") if (family[1]=="probit"){ x <- rnorm(n, alpha+beta*z, sigma) } else if (family[1]=="logistic"){ x <- rlogis(n, alpha+beta*z, sigma) } else if (family[1]=="gumbelMin"){ x <- rGumbelMin(n, mu=alpha+beta*z, sigma=sigma) } else if (family[1]=="gumbelMax"){ x <- rGumbelMax(n, mu=alpha+beta*z, sigma=sigma) } else if(family[1]=="cauchy"){ x <- rcauchy(n, alpha+beta*z, sigma) } else x <- rep(NA, n) data <- data.frame(x=x, z=z) return(data) } orm.link.sim <- function(sim=100,seed=1, n=50, z.type=c("continuous", "binary"),family=c("probit", "logistic", "gumbelMin","gumbelMax", "cauchy"), p=0.5,sigma.z=1, alpha=0, beta=1, sigma=1){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] LR.result <- matrix(NA, ncol=4, nrow=sim) p.result <- matrix(NA, ncol=4, nrow=sim) colnames(LR.result) <- c("probit","logit","loglog", "cloglog") colnames(p.result) <- c("probit","logit","loglog", "cloglog") for(i in 1:sim){ try({ data <- generate.data(seed=seeds[i], n=n, family=family, z.type=z.type, p=p,sigma.z=sigma.z, alpha=alpha, beta=beta, sigma=sigma) mod.probit <- orm(x ~z, data=data, family=probit) mod.logit <- orm(x ~z, data=data) mod.loglog <- orm(x~z, data=data, family=loglog) mod.cloglog <- orm(x~z, data=data, family=cloglog) LR.result[i, ] <- -c(mod.probit$deviance[2], mod.logit$deviance[2],mod.loglog$deviance[2], mod.cloglog$deviance[2]) p.result[i, ] <- c(anova(mod.probit)["z", "P"], anova(mod.logit)["z", "P"], anova(mod.loglog)["z", "P"], anova(mod.cloglog)["z", "P"]) }) } return(list(LR.result=LR.result, # beta.result=beta.result, #se.result=se.result, p.result=p.result)) } z.type <- "continuous" seed <- 1 sim <- 10^4 sigma.z <- 1 alpha <- 0 beta <- 0.1 sigma <- 1 ###### true error distribution: normal ################## probit.25.null <- orm.link.sim(sim=sim, seed=seed, n=25,family="probit", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) probit.25.power <- orm.link.sim(sim=sim, seed=seed, n=25,family="probit", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) probit.50.null <- orm.link.sim(sim=sim, seed=seed, n=50,family="probit", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) probit.50.power <- orm.link.sim(sim=sim, seed=seed, n=50,family="probit", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) probit.100.null <- orm.link.sim(sim=sim, seed=seed, n=100,family="probit", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) probit.100.power <- orm.link.sim(sim=sim, seed=seed, n=100,family="probit", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) probit.200.null <- orm.link.sim(sim=sim, seed=seed, n=200,family="probit", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) probit.200.power <- orm.link.sim(sim=sim, seed=seed, n=200,family="probit", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) probit.500.null <- orm.link.sim(sim=sim, seed=seed, n=500,family="probit", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) probit.500.power <- orm.link.sim(sim=sim, seed=seed, n=500,family="probit", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) probit.1000.null <- orm.link.sim(sim=sim, seed=seed, n=1000,family="probit", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) probit.1000.power <- orm.link.sim(sim=sim, seed=seed, n=1000,family="probit", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) ###### true error distribution: logistic ############## logit.25.null <- orm.link.sim(sim=sim, seed=seed, n=25,family ="logistic", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) logit.25.power <- orm.link.sim(sim=sim, seed=seed, n=25,family ="logistic", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) logit.50.null <- orm.link.sim(sim=sim, seed=seed, n=50,family ="logistic", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) logit.50.power <- orm.link.sim(sim=sim, seed=seed, n=50,family ="logistic", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) logit.100.null <- orm.link.sim(sim=sim, seed=seed, n=100,family ="logistic", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) logit.100.power <- orm.link.sim(sim=sim, seed=seed, n=100,family ="logistic", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) logit.200.null <- orm.link.sim(sim=sim, seed=seed, n=200,family ="logistic", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) logit.200.power <- orm.link.sim(sim=sim, seed=seed, n=200,family ="logistic", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) logit.500.null <- orm.link.sim(sim=sim, seed=seed, n=500,family ="logistic", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) logit.500.power <- orm.link.sim(sim=sim, seed=seed, n=500,family ="logistic", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) logit.1000.null <- orm.link.sim(sim=sim, seed=seed, n=1000,family ="logistic", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) logit.1000.power <- orm.link.sim(sim=sim, seed=seed, n=1000,family ="logistic", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) ###### true error distribution: extreme value (type I) ################## loglog.25.null <- orm.link.sim(sim=sim, seed=seed, n=25,family ="gumbelMin", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) loglog.25.power <- orm.link.sim(sim=sim, seed=seed, n=25,family ="gumbelMin", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) loglog.50.null <- orm.link.sim(sim=sim, seed=seed, n=50,family ="gumbelMin", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) loglog.50.power <- orm.link.sim(sim=sim, seed=seed, n=50,family ="gumbelMin", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) loglog.100.null <- orm.link.sim(sim=sim, seed=seed, n=100,family ="gumbelMin", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) loglog.100.power <- orm.link.sim(sim=sim, seed=seed, n=100,family ="gumbelMin", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) loglog.200.null <- orm.link.sim(sim=sim, seed=seed, n=200,family ="gumbelMin", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) loglog.200.power <- orm.link.sim(sim=sim, seed=seed, n=200,family ="gumbelMin", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) loglog.500.null <- orm.link.sim(sim=sim, seed=seed, n=500,family ="gumbelMin", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) loglog.500.power <- orm.link.sim(sim=sim, seed=seed, n=500,family ="gumbelMin", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) loglog.1000.null <- orm.link.sim(sim=sim, seed=seed, n=1000,family ="gumbelMin", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) loglog.1000.power <- orm.link.sim(sim=sim, seed=seed, n=1000,family ="gumbelMin", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) ###### true error distribution: extreme value (type II) ########## cloglog.25.null <- orm.link.sim(sim=sim, seed=seed, n=25,family = "gumbelMax", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) cloglog.25.power <- orm.link.sim(sim=sim, seed=seed, n=25,family = "gumbelMax", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) cloglog.50.null <- orm.link.sim(sim=sim, seed=seed, n=50,family = "gumbelMax", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) cloglog.50.power <- orm.link.sim(sim=sim, seed=seed, n=50,family = "gumbelMax", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) cloglog.100.null <- orm.link.sim(sim=sim, seed=seed, n=100,family = "gumbelMax", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) cloglog.100.power <- orm.link.sim(sim=sim, seed=seed, n=100,family = "gumbelMax", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) cloglog.200.null <- orm.link.sim(sim=sim, seed=seed, n=200,family = "gumbelMax", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) cloglog.200.power <- orm.link.sim(sim=sim, seed=seed, n=200,family = "gumbelMax", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) cloglog.500.null <- orm.link.sim(sim=sim, seed=seed, n=500,family = "gumbelMax", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) cloglog.500.power <- orm.link.sim(sim=sim, seed=seed, n=500,family = "gumbelMax", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) cloglog.1000.null <- orm.link.sim(sim=sim, seed=seed, n=1000,family = "gumbelMax", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0, sigma=sigma) cloglog.1000.power <- orm.link.sim(sim=sim, seed=seed, n=1000,family = "gumbelMax", z.type=z.type,p=p, sigma.z=sigma.z, alpha=alpha, beta=0.1, sigma=sigma) ####### PRINT OUT TABLE S.1 tab.s1.summary <- function(result){ power <- apply(result$p.result[, c(1,3,4)], 2, FUN=function(x) mean(x<0.05, na.rm=TRUE)) power <- format(round(power,3), nsmall=3) winner <- apply(result$LR.result[, c(1,3,4)], 1, FUN=function(x) which(x==max(x, na.rm=TRUE))[1]) winner.perc <- format(c(mean(winner==1, na.rm=TRUE), mean(winner==2, na.rm=TRUE), mean(winner==3, na.rm=TRUE))*100, nsmall=2) p.winner <- result$p.result[, c(1,3,4)][cbind(1:sim, winner)] power.winner <- mean(p.winner<0.05, na.rm=TRUE) winner.out <- paste(format(round(power.winner,3), nsmall=3), " (",winner.perc[1], "%, ", winner.perc[2], "%, ", winner.perc[3], "%)",sep="") out <- c(winner.out, power) names(out)[1] <- "link function selection" return(out) } probit.result <- rbind(tab.s1.summary(probit.25.null), tab.s1.summary(probit.25.power), tab.s1.summary(probit.50.null), tab.s1.summary(probit.50.power), tab.s1.summary(probit.100.null), tab.s1.summary(probit.100.power), tab.s1.summary(probit.200.null), tab.s1.summary(probit.200.power), tab.s1.summary(probit.500.null), tab.s1.summary(probit.500.power), tab.s1.summary(probit.1000.null), tab.s1.summary(probit.1000.power)) logit.result <- rbind(tab.s1.summary(logit.25.null), tab.s1.summary(logit.25.power), tab.s1.summary(logit.50.null), tab.s1.summary(logit.50.power), tab.s1.summary(logit.100.null), tab.s1.summary(logit.100.power), tab.s1.summary(logit.200.null), tab.s1.summary(logit.200.power), tab.s1.summary(logit.500.null), tab.s1.summary(logit.500.power), tab.s1.summary(logit.1000.null), tab.s1.summary(logit.1000.power)) loglog.result <- rbind(tab.1s.summary(loglog.25.null), tab.s1.summary(loglog.25.power), tab.s1.summary(loglog.50.null), tab.s1.summary(loglog.50.power), tab.s1.summary(loglog.100.null), tab.s1.summary(loglog.100.power), tab.s1.summary(loglog.200.null), tab.s1.summary(loglog.200.power), tab.s1.summary(loglog.500.null), tab.s1.summary(loglog.500.power), tab.s1.summary(loglog.1000.null), tab.s1.summary(loglog.1000.power)) cloglog.result <- rbind(tab.1s.summary(cloglog.25.null), tab.s1.summary(cloglog.25.power), tab.s1.summary(cloglog.50.null), tab.s1.summary(cloglog.50.power), tab.s1.summary(cloglog.100.null), tab.s1.summary(cloglog.100.power), tab.s1.summary(cloglog.200.null), tab.s1.summary(cloglog.200.power), tab.s1.summary(cloglog.500.null), tab.s1.summary(cloglog.500.power), tab.s1.summary(cloglog.1000.null), tab.s1.summary(cloglog.1000.power)) tab.s1.out <- rbind(rep("", 4), probit.result, rep("", 4), logit.result, rep("", 4), loglog.result, rep("", 4), cloglog.result ) rownames(tab.s1.out) <- c("Normal", "~ ~ ~ ~ $n=25$ ~ ~ $H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$","~ ~ ~ ~ $n=50$ ~ ~ $H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$", "~ ~ ~ ~ $n=100$ ~ ~$H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$", "~ ~ ~ ~ $n=200$ ~ ~$H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$","~ ~ ~ ~ $n=500$ ~ ~$H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$","~ ~ ~ ~ $n=1000$ ~$H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$", "Logistic", "~ ~ ~ ~ $n=25$ ~ ~ $H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$","~ ~ ~ ~ $n=50$ ~ ~ $H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$", "~ ~ ~ ~ $n=100$ ~ ~$H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$", "~ ~ ~ ~ $n=200$ ~ ~$H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$","~ ~ ~ ~ $n=500$ ~ ~$H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$","~ ~ ~ ~ $n=1000$ ~$H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$", "Extreme Type I", "~ ~ ~ ~ $n=25$ ~ ~ $H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$","~ ~ ~ ~ $n=50$ ~ ~ $H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$", "~ ~ ~ ~ $n=100$ ~ ~$H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$", "~ ~ ~ ~ $n=200$ ~ ~$H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$","~ ~ ~ ~ $n=500$ ~ ~$H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$","~ ~ ~ ~ $n=1000$ ~$H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$", "Extreme Type II", "~ ~ ~ ~ $n=25$ ~ ~ $H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$","~ ~ ~ ~ $n=50$ ~ ~ $H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$", "~ ~ ~ ~ $n=100$ ~ ~$H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$", "~ ~ ~ ~ $n=200$ ~ ~$H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$","~ ~ ~ ~ $n=500$ ~ ~$H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$","~ ~ ~ ~ $n=1000$ ~$H_0$","~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ $H_1$") latex(tab.s1.out, where="!htbp", file="",title="True error distribution",caption="link function selection", size="scriptsize") ######### sim 4: orm for outcomes subject to dection limit ########### generate.data <- function(seed=1, n=100, alpha=3, beta=0.25, sigma=1){ set.seed(seed) z <- rnorm(n, 0, 1) y <- rnorm(n, alpha+beta*z, sigma) data <- data.frame(z=z, y=y) return(data) } detection.limit.sim <- function(sim=1000,seed=1, n=100, alpha=3, beta=0.25, sigma=1, cut.off=3){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] pval <- matrix(NA, ncol=5, nrow=sim) colnames(pval) <- c("orm (probit)","orm (logit)", "logistic", "ols 1", "ols 2") est <- matrix(NA, ncol=5, nrow=sim) colnames(est) <- c("orm (probit)","orm (logit)", "logistic", "ols 1", "ols 2") est.se <- matrix(NA, ncol=5, nrow=sim) colnames(est.se) <- c("orm (probit)","orm (logit)", "logistic", "ols 1", "ols 2") for(i in 1:sim){ try({ data <- generate.data(seed=seeds[i], n=n, alpha=alpha, beta=beta, sigma=sigma) data$y.d <- ifelse(data$y350, 1, 0) d$post.cd4.500 <- ifelse(d$post.cd4>500, 1, 0) #d$post.cd4.150 <- ifelse(d$post.cd4>150, 1, 0) ##### logistic regression models lrm.500 <- lrm(post.cd4.500 ~ site + male + rcs(age) + init.class + route + rcs(sqrt(nadir.cd4)) +rcs(init.year)+ rcs(log(pre.rna))+ preARTAIDS.clinical, data=d, x=TRUE) test <- predict(lrm.500, new.data.ols, se.fit=TRUE) est.cdf.glm.logit <- cbind(plogis(test$linear.predictors), plogis(with(test, linear.predictors + 1.96*cbind(-se.fit,se.fit)))) lrm.350 <- lrm(post.cd4.350 ~ site + male + rcs(age) + init.class + route + rcs(sqrt(nadir.cd4)) +rcs(init.year)+ rcs(log(pre.rna))+ preARTAIDS.clinical, data=d, x=TRUE) test <- predict(lrm.350, new.data.ols, se.fit=TRUE) est.cdf.glm.logit2 <- cbind(plogis(test$linear.predictors), plogis(with(test, linear.predictors + 1.96*cbind(-se.fit,se.fit)))) postscript("fig15-app_3.eps", height=4.5, width=8, horizontal = FALSE, onefile = FALSE, paper = "special") par(mfrow=c(2,4)) par(mar=c(2.5,2.5,0.5,0.5), mgp = c(1.5, 0.5, 0), oma=c(0.5,3,2,0.5)) plot(new.age, est.mean.orm.logit[,1], cex= 0.01, ylim=c(280, 410), ylab="CD4", xlab="age", type="n", main="") polygon(c(new.age, rev(new.age)), c(est.mean.boxcox[,2], rev(est.mean.boxcox[,3])), col =colors()[560], border = NA) polygon(c(new.age, rev(new.age)), c(est.mean.orm.logit[,3], rev(est.mean.orm.logit[,4])), col = "grey", border = NA) lines(new.age, est.mean.orm.logit[,1], cex=0.01) lines(new.age, est.mean.orm.logit[,3], cex=0.01, lty=2) lines(new.age, est.mean.orm.logit[,4], cex=0.01, lty=2) lines(new.age, est.mean.boxcox[,1], cex=0.01, col=2) lines(new.age, est.mean.boxcox[,2], cex=0.01, col=2, lty=2) lines(new.age, est.mean.boxcox[,3], cex=0.01, col=2, lty=2) legend("top", c("orm (logit)", "Box-Cox" ), col=c(1,2), lty=c(1,1),bty="n") plot(new.age, est.median.orm.logit$quantile, cex=0.01, ylim=c(280, 410), ylab="CD4", xlab="age", type="n") polygon(c(new.age, rev(new.age)), c(est.median.rq[,2], rev(est.median.rq[,3])),col =colors()[560], border = NA) polygon(c(new.age, rev(new.age)), c(est.median.orm.logit$lb, rev(est.median.orm.logit$ub)), col = 'grey', border = NA) lines(new.age, est.median.orm.logit$quantile, cex=0.01) lines(new.age, est.median.orm.logit$lb, cex=0.01, lty=2) lines(new.age, est.median.orm.logit$ub, cex=0.01, lty=2) lines(new.age, est.median.rq[,1], cex=0.01, col=2) lines(new.age, est.median.rq[,2], cex=0.01, col=2, lty=2) lines(new.age, est.median.rq[,3], cex=0.01, col=2, lty=2) legend("top", c("orm (logit)", "Median regression"), col=c(1,2), lty=c(1,1),bty="n") plot(new.age,1-est.cdf.orm.logit2$est[,1], cex=0.01, xlab="age", ylab="Probability", ylim=c(0.25,0.75), typ="n") polygon(c(new.age, rev(new.age)), c(est.cdf.glm.logit2[,2], rev(est.cdf.glm.logit2[,3])), col =colors()[560] , border = NA) polygon(c(new.age, rev(new.age)), c(1-est.cdf.orm.logit2$ub[,1], rev(1-est.cdf.orm.logit2$lb[,1])), col = 'grey', border = NA) lines(new.age, 1-est.cdf.orm.logit2$est[,1], cex=0.01) lines(new.age, 1-est.cdf.orm.logit2$ub[,1], cex=0.01, lty=2) lines(new.age, 1-est.cdf.orm.logit2$lb[,1], cex=0.01, lty=2) lines(new.age, est.cdf.glm.logit2[,1], cex=0.01, col=2) lines(new.age, est.cdf.glm.logit2[,2], cex=0.01, col=2, lty=2) lines(new.age, est.cdf.glm.logit2[,3], cex=0.01, col=2, lty=2) legend("top", c("orm (logit)", "logistic regression"), col=c(1,2), lty=c(1,1),bty="n") plot(new.age,1-est.cdf.orm.logit$est[,1], cex=0.01, xlab="age", ylab="Probability", ylim=c(0,0.41), typ="n") polygon(c(new.age, rev(new.age)), c(est.cdf.glm.logit[,2], rev(est.cdf.glm.logit[,3])), col = colors()[560], border = NA) polygon(c(new.age, rev(new.age)), c(1-est.cdf.orm.logit$ub[,1], rev(1-est.cdf.orm.logit$lb[,1])), col = 'grey', border = NA) lines(new.age, 1-est.cdf.orm.logit$est[,1], cex=0.01) lines(new.age, 1-est.cdf.orm.logit$ub[,1], cex=0.01, lty=2) lines(new.age, 1-est.cdf.orm.logit$lb[,1], cex=0.01, lty=2) lines(new.age, est.cdf.glm.logit[,1], cex=0.01, col=2) lines(new.age, est.cdf.glm.logit[,2], cex=0.01, col=2, lty=2) lines(new.age, est.cdf.glm.logit[,3], cex=0.01, col=2, lty=2) legend("top", c("orm (logit)", "logistic regression"), col=c(1,2), lty=c(1,1),bty="n") mtext("Mean", side=3, at=1/8, outer=TRUE, cex=1) mtext("Median", side=3, at=3/8, outer=TRUE, cex=1) mtext("P[CD4>350|Z]", side=3, at=5/8, outer=TRUE, cex=1) mtext("P[CD4>500|Z]", side=3, at=7/8, outer=TRUE, cex=1) ####### by treatment class new.data2 <- new.data[1:4,] new.data2[,7:10] <- matrix(rcspline.eval(35, knots=orm.logit$Design$parms$age, inclx=TRUE),nrow=4, ncol=4, byrow=TRUE) new.data2[1, 11:13] <- orm.logit$x[which(d$init.class=="Boosted PI")[1], 11:13] new.data2[2, 11:13] <- orm.logit$x[which(d$init.class=="NNRTI")[1], 11:13] new.data2[3, 11:13] <- orm.logit$x[which(d$init.class=="Unboosted PI")[1], 11:13] new.data2[4, 11:13] <- orm.logit$x[which(d$init.class=="Other")[1], 11:13] est.mean.orm.logit <- mean.orm(orm.logit, new.data2,se=TRUE ) new.data.ols <- data.frame(site="brazil", male=1, age=35, init.class=c("Boosted PI","NNRTI","Unboosted PI","Other"), route="homo/bisexual", nadir.cd4=167, init.year=2010, pre.rna=91728, preARTAIDS.clinical="not AIDS") est.lm.boxcox.t <- predict(lm.boxcox, new.data.ols, se.fit=TRUE, conf.int=0.95) est.mean.boxcox <- cbind(box.cox.trans.inverse(est.lm.boxcox.t$linear.predictors, lambda), box.cox.trans.inverse(est.lm.boxcox.t$lower, lambda), box.cox.trans.inverse(est.lm.boxcox.t$upper, lambda)) X <- cbind(1, new.data2) pred <-drop(X%*%rq.mod$coef) V <- summary(rq.mod, cov = TRUE) df <- V$rdf tfrac <- qt((1 - 0.95)/2, df) sdpred <- sqrt(diag(X %*% V$cov %*% t(X))) est.median.rq <- cbind(pred, pred + tfrac * sdpred %o% c(1, -1)) colnames(est.median.rq) <- c("fit", "lower", "higher") est.median.orm.logit <- quantile.orm(mod=orm.logit,new.data=new.data2, probs=0.5,se=TRUE ) test <- predict(lrm.500, new.data.ols, se.fit=TRUE) est.cdf.glm.logit <- cbind(plogis(test$linear.predictors), plogis(with(test, linear.predictors + 1.96*cbind(-se.fit,se.fit)))) test <- predict(lrm.350, new.data.ols, se.fit=TRUE) est.cdf.glm.logit2 <- cbind(plogis(test$linear.predictors), plogis(with(test, linear.predictors + 1.96*cbind(-se.fit,se.fit)))) est.cdf.orm.logit <- cdf.orm(mod=orm.logit, new.data=new.data2, at.y=500) est.cdf.orm.logit2 <- cdf.orm(mod=orm.logit, new.data=new.data2, at.y=350) plot(1:4,est.mean.orm.logit[,1], type="n", axes=FALSE, ylab="CD4", xlab="", xlim=c(0.5, 4.5), ylim=c(300, 450), main="") #abline(v=1) axis(2, at=c(300,350,400, 450), labels=c("300", "350","400", "450")) segments(1:4,est.mean.orm.logit[,3], 1:4, est.mean.orm.logit[,4]) points(1:4,est.mean.orm.logit[,1], pch="-", cex=1) segments((1:4)+0.2,est.mean.boxcox[,2], (1:4)+0.2, est.mean.boxcox[,3], col=2) points((1:4)+0.2,est.mean.boxcox[,1], pch="-", cex=1, col=2) #segments(hernan.700.sw[,2], cd4+3, hernan.700.sw[,3], cd4+3, col=2) #points(hernan.700.sw[,1], cd4+3, pch="|", cex=0.4, col=2) legend("top", c("orm (logit)", "Box-Cox"), col=c(1,2), lty=c(1,1),bty="n") axis(1, 1:4, labels=c("BPI","NNRTI","UBPI","Other"), las=1, tck=-0.01, col=1, cex.axis=0.8) box() plot(1:4,est.median.orm.logit$quantile, type="n", axes=FALSE, ylab="CD4", xlab="", xlim=c(0.5, 4.5), ylim=c(300, 450), main="") #abline(v=1) axis(2, at=c(300,350,400, 450), labels=c("300", "350","400", "450")) segments(1:4,est.median.orm.logit$lb, 1:4, est.median.orm.logit$ub) points(1:4,est.median.orm.logit$quantile, pch="-", cex=1) segments((1:4)+0.2,est.median.rq[,2], (1:4)+0.2, est.median.rq[,3], col=2) points((1:4)+0.2,est.median.rq[,1], pch="-", cex=1, col=2) #segments(hernan.700.sw[,2], cd4+3, hernan.700.sw[,3], cd4+3, col=2) #points(hernan.700.sw[,1], cd4+3, pch="|", cex=0.4, col=2) legend("top", c("orm (logit)", "Median regression"), col=c(1,2), lty=c(1,1),bty="n") axis(1, 1:4, labels=c("BPI","NNRTI","UBPI","Other"), las=1, tck=-0.01, col=1, cex.axis=0.8) box() plot(1:4,1-est.cdf.orm.logit2$est, type="n", axes=FALSE, ylab="probability", xlab="", xlim=c(0.5, 4.5), ylim=c(0.3, 0.8), main="") axis(2, at=c(0.4, 0.5, 0.6,0.7), labels=c("0.4", "0.5","0.6","0.7")) segments(1:4,1-est.cdf.orm.logit2$lb, 1:4, 1-est.cdf.orm.logit2$ub) points(1:4,1-est.cdf.orm.logit2$est, pch="-", cex=1) segments((1:4)+0.2,est.cdf.glm.logit2[,2], (1:4)+0.2, est.cdf.glm.logit2[,3], col=2) points((1:4)+0.2,est.cdf.glm.logit2[,1], pch="-", cex=1, col=2) legend("top", c("orm (logit)", "Logistic regression"), col=c(1,2), lty=c(1,1),bty="n") axis(1, 1:4, labels=c("BPI","NNRTI","UBPI","Other"), las=1, tck=-0.01, col=1, cex.axis=0.8) box() plot(1:4,1-est.cdf.orm.logit$est, type="n", axes=FALSE, ylab="probability", xlab="", xlim=c(0.5, 4.5), ylim=c(0.05, 0.4), main="") axis(2, at=c( 0.1, 0.2, 0.3, 0.4), labels=c("0.1", "0.2","0.3", "0.4")) segments(1:4,1-est.cdf.orm.logit$lb, 1:4, 1-est.cdf.orm.logit$ub) points(1:4,1-est.cdf.orm.logit$est, pch="-", cex=1) segments((1:4)+0.2,est.cdf.glm.logit[,2], (1:4)+0.2, est.cdf.glm.logit[,3], col=2) points((1:4)+0.2,est.cdf.glm.logit[,1], pch="-", cex=1, col=2) legend("top", c("orm (logit)", "Logistic regression"), col=c(1,2), lty=c(1,1),bty="n") axis(1, 1:4, labels=c("BPI","NNRTI","UBPI","Other"), las=1, tck=-0.01, col=1, cex.axis=0.8) box() dev.off() ##### viral load as outcome ########################################################### ########## orm with different link functions ############## ########################################################### orm.logit <- orm(post.rna.d~ site + male + rcs(age,4) + init.class + route + rcs(sqrt(nadir.cd4),4) +rcs(init.year,4)+ rcs(log(pre.rna),4)+ preARTAIDS.clinical, data=d, x=TRUE, y=TRUE) orm.probit <- orm(post.rna.d~ site + male + rcs(age,4) + init.class + route + rcs(sqrt(nadir.cd4),4) +rcs(init.year,4)+ rcs(log(pre.rna),4)+ preARTAIDS.clinical, data=d, x=TRUE, y=TRUE, family=probit) orm.cloglog <- orm(post.rna.d~ site + male + rcs(age,4) + init.class + route + rcs(sqrt(nadir.cd4),4) +rcs(init.year,4)+ rcs(log(pre.rna),4)+ preARTAIDS.clinical, data=d, x=TRUE, y=TRUE, family=cloglog) orm.loglog <- orm(post.rna.d~ site + male + rcs(age,4) + init.class + route + rcs(sqrt(nadir.cd4),4) +rcs(init.year,4)+ rcs(log(pre.rna),4)+ preARTAIDS.clinical, data=d, x=TRUE, y=TRUE, family=loglog,eps=1e-3) log.L <- c(-orm.probit$deviance[2], -orm.logit$deviance[2], -orm.loglog$deviance[2], -orm.cloglog$deviance[2])/2 ### cloglog in orm correspoonds to loglog in conventional cumulative link models names(log.L) <- c("probit", "logit", "loglog", "cloglog") log.L # probit logit loglog cloglog # -5213.691 -5185.259 -5245.030 -5162.703 presid.logit <- presid.orm(orm.logit) presid.probit <- presid.orm(orm.probit) presid.cloglog <- presid.orm(orm.cloglog) presid.loglog <- presid.orm(orm.loglog) ############################################################### ###### Figure S.8 estimated transformation and PSR QQ plot ############## ############################################################### postscript("fig19-app_4.eps", height=6, width=8, horizontal = FALSE, onefile = FALSE, paper = "special") par(mfrow=c(3,4)) par(mar=c(2.5,2.7,0.5,0.2), mgp = c(1.5, 0.5, 0), oma=c(0.5,3,2,0.5)) alpha.probit <- -orm.probit$coefficients[1:(length(orm.probit$yunique)-1)] plot(c(0, log(orm.probit$yunique[-1],10)), c(alpha.probit, Inf), type="s", ylab=expression(paste(hat(alpha))), xlab="log(viral load)", col=1, axes=FALSE) abline(v=log(orm.probit$yunique[-1],10)[1], lty=2) axis(side=2) axis(side=1, at=c(3,4,5), labels=c("3", "4", "5")) box() mtext("undetectable", side=1, at=1, cex=0.6) alpha.logit <- -orm.logit$coefficients[1:(length(orm.logit$yunique)-1)] plot(c(0, log(orm.logit$yunique[-1],10)), c(alpha.logit, Inf), type="s", ylab=expression(paste(hat(alpha))), xlab="log(viral load)", col=1, axes=FALSE) abline(v=log(orm.logit$yunique[-1],10)[1], lty=2) axis(side=2) axis(side=1, at=c(3, 4,5), labels=c("3", "4", "5")) box() mtext("undetectable", side=1, at=1, cex=0.6) alpha.loglog <- -orm.loglog$coefficients[1:(length(orm.loglog$yunique)-1)] plot(c(0, log(orm.loglog$yunique[-1],10)), c(alpha.loglog, Inf), type="s", ylab=expression(paste(hat(alpha))), xlab="log(viral load)", col=1, axes=FALSE) abline(v=log(orm.loglog$yunique[-1],10)[1], lty=2) axis(side=2) axis(side=1, at=c(3, 4,5), labels=c("3", "4", "5")) box() mtext("undetectable", side=1, at=1, cex=0.6) alpha.cloglog <- -orm.cloglog$coefficients[1:(length(orm.cloglog$yunique)-1)] plot(c(0, log(orm.cloglog$yunique[-1],10)), c(alpha.cloglog, Inf), type="s", ylab=expression(paste(hat(alpha))), xlab="log(viral load)", col=1, axes=FALSE) abline(v=log(orm.cloglog$yunique[-1],10)[1], lty=2) axis(side=2) axis(side=1, at=c(3, 4,5), labels=c("3", "4", "5")) box() mtext("undetectable", side=1, at=1, cex=0.6) order.presid.probit <- presid.probit[order(presid.probit)] order.presid.logit <- presid.logit[order(presid.logit)] order.presid.loglog <- presid.loglog[order(presid.loglog)] order.presid.cloglog <- presid.cloglog[order(presid.cloglog)] plot( qunif( (1:length(presid.probit))/length(presid.probit), -1, 1), order.presid.probit, cex=0.1, xlab="Quantiles of Uniform Distribution", ylab="Quantiles of PSR", main="", col=ifelse( (d$post.rna.d==0)[order(presid.probit)], 4,1), ylim=c(-1,1)) abline(a=0, b=1, col=2) legend("top", c("undetectable", "detectable"), col=c(4,1), lty=1, bty="n") plot(qunif( (1:length(presid.logit))/length(presid.logit), -1, 1), order.presid.logit, cex=0.1, xlab="Quantiles of Uniform Distribution", ylab="Quantiles of PSR", main="", col=ifelse( (d$post.rna.d==0)[order(presid.logit)], 4,1), ylim=c(-1, 1)) abline(a=0, b=1, col=2) legend("top", c("undetectable", "detectable"), col=c(4,1), lty=1, bty="n") plot( qunif( (1:length(presid.loglog))/length(presid.loglog), -1, 1), order.presid.loglog,cex=0.1, xlab="Quantiles of Uniform Distribution", ylab="Quantiles of PSR", main="", col=ifelse( (d$post.rna.d==0)[order(presid.loglog)], 4,1), ylim=c(-1,1)) abline(a=0, b=1, col=2) legend("top", c("undetectable", "detectable"), col=c(4,1), lty=1, bty="n") plot( qunif( (1:length(presid.cloglog))/length(presid.cloglog), -1, 1), order.presid.cloglog,cex=0.1, xlab="Quantiles of Uniform Distribution", ylab="Quantiles of PSR", main="", col=ifelse( (d$post.rna.d==0)[order(presid.cloglog)], 4,1), ylim=c(-1,1)) abline(a=0, b=1, col=2) legend("top", c("undetectable", "detectable"), col=c(4,1), lty=1, bty="n") mtext( "(a)", side=2, outer=TRUE, at=1, cex=1, las=1) mtext( "(b)", side=2, outer=TRUE, at=2/3, cex=1, las=1) mtext( "(c)", side=2, outer=TRUE, at=1/3, cex=1, las=1) mtext("probit", side=3, at=1/8, outer=TRUE, cex=1) mtext("logit", side=3, at=3/8, outer=TRUE, cex=1) mtext("cloglog", side=3, at=5/8, outer=TRUE, cex=1) mtext("loglog", side=3, at=7/8, outer=TRUE, cex=1) orm.omer.logit <- -orm.logit$coefficients[sapply(d$post.rna.d, FUN=function(x) which(orm.logit$yunique==x))] - (predict(orm.logit, type="lp", kint=1) - orm.logit$coefficients[1]) orm.omer.logit <- orm.omer.logit[-which(d$post.rna.d==max(d$post.rna.d))] orm.omer.probit <- -orm.probit$coefficients[sapply(d$post.rna.d, FUN=function(x) which(orm.probit$yunique==x))] - (predict(orm.probit, type="lp", kint=1) - orm.probit$coefficients[1]) orm.omer.probit <- orm.omer.probit[-which(d$post.rna.d==max(d$post.rna.d))] orm.omer.cloglog <- -orm.cloglog$coefficients[sapply(d$post.rna.d, FUN=function(x) which(orm.cloglog$yunique==x))] - (predict(orm.cloglog, type="lp", kint=1) - orm.cloglog$coefficients[1]) orm.omer.cloglog <- orm.omer.cloglog[-which(d$post.rna.d==max(d$post.rna.d))] orm.omer.loglog <- -orm.loglog$coefficients[sapply(d$post.rna.d, FUN=function(x) which(orm.loglog$yunique==x))] - (predict(orm.loglog, type="lp", kint=1) - orm.loglog$coefficients[1]) orm.omer.loglog <- orm.omer.loglog[-which(d$post.rna.d==max(d$post.rna.d))] order.orm.omer.probit <- orm.omer.probit[order(orm.omer.probit)] order.orm.omer.logit <- orm.omer.logit[order(orm.omer.logit)] order.orm.omer.loglog <- orm.omer.loglog[order(orm.omer.loglog)] order.orm.omer.cloglog <- orm.omer.cloglog[order(orm.omer.cloglog)] plot( qnorm( (1:length(orm.omer.probit))/length(orm.omer.probit)), order.orm.omer.probit, cex=0.25, xlab="Quantiles of Normal Distribution", ylab="Quantiles of OMER", main="", col=ifelse( (d$post.rna.d[-which(d$post.rna.d==max(d$post.rna.d)) ]==0)[order(orm.omer.probit)], 4,1)) abline(a=0, b=1, col=2) legend("top", c("undetectable", "detectable"), col=c(4,1), lty=1, bty="n") plot(qlogis( (1:length(orm.omer.logit))/length(orm.omer.logit)), order.orm.omer.logit, cex=0.25, xlab="Quantiles of Logistic Distribution", ylab="Quantiles of OMER", main="", col=ifelse( (d$post.rna.d[-which(d$post.rna.d==max(d$post.rna.d)) ]==0)[order(orm.omer.logit)], 4,1)) abline(a=0, b=1, col=2) legend("top", c("undetectable", "detectable"), col=c(4,1), lty=1, bty="n") qGumbelMax <- function(p, mu=0, sigma=1){ x <- mu + -sigma*(log(-log(p))) } qGumbelMin <- function(p, mu=0, sigma=1){ x <- mu + sigma*(log(-log(1-p))) } plot( qGumbelMin((1:length(orm.omer.logit))/length(orm.omer.logit), 0, 1), order.orm.omer.loglog,cex=0.25, xlab="Quantiles of Extreme Value (I) ", ylab="Quantiles of OMER", main="", col=ifelse( (d$post.rna.d[-which(d$post.rna.d==max(d$post.rna.d)) ]==0)[order(orm.omer.loglog)], 4,1)) #qqline(order.orm.omer.cloglog, distribution=function(x) qGumbelMax(x,), col=3) abline(a=0, b=1, col=2) legend("top", c("undetectable", "detectable"), col=c(4,1), lty=1, bty="n") plot( qGumbelMax((1:length(orm.omer.logit))/length(orm.omer.logit), 0, 1), order.orm.omer.cloglog,cex=0.25, xlab="Quantiles of Extreme Value (II) ", ylab="Quantiles of OMER", main="", col=ifelse( (d$post.rna.d[-which(d$post.rna.d==max(d$post.rna.d)) ]==0)[order(orm.omer.cloglog)], 4,1)) abline(a=0, b=1, col=2) legend("top", c("undetectable", "detectable"), col=c(4,1), lty=1, bty="n") dev.off() ################################################### ####### Figure S.9: PSR by predictor plot ######### ################################################## ######### model not include age ############## orm.logit.2 <- orm(post.rna.d~ site + male + init.class + route + rcs(log(pre.rna),4) + rcs(sqrt(nadir.cd4),4) +rcs(init.year,4)+ preARTAIDS.clinical, data=d, x=TRUE, y=TRUE) presid.logit.2 <- presid.orm(orm.logit.2) orm.cloglog.2 <- orm(post.rna.d~ site + male + init.class + route + rcs(log(pre.rna),4) + rcs(sqrt(nadir.cd4),4) +rcs(init.year,4)+ preARTAIDS.clinical, data=d,family=cloglog, x=TRUE, y=TRUE) presid.cloglog.2 <- presid.orm(orm.logit.2) postscript("fig20-app_5.eps", height=6, width=6, horizontal = FALSE, onefile = FALSE, paper = "special") par(mfcol=c(2,2)) par(mar=c(2.5,2.5,0.5,0.5), mgp = c(1.5, 0.5, 0), oma=c(0.5,0.5,2,0.5)) plot(d$age, presid.logit.2, cex=0.1, type="n", ylim=c(-1, 1), xlab="age", ylab="PSR (logit) ", main="") points(d$age, presid.logit.2,cex=.5,col=gray(.6), pch=ifelse(d$post.rna.detect==0, 1,3)) lines(supsmu(d$age, presid.logit.2, bass=1),col=1,lwd=2, cex=1) #lines(lowess(presid.logit.2~log(d$pre.rna,10)),lwd=2) abline(h=0,lty=2) legend("bottomleft", c("undetectable", "detectable"), cex=0.8,pch=c(1,3), bty="n", col=gray(0.6)) plot(d$age, presid.cloglog.2, cex=0.1, type="n", ylim=c(-1, 1), xlab="age", ylab="PSR (loglog)", main="") points(d$age, presid.cloglog.2,cex=.5,col=gray(.6), pch=ifelse(d$post.rna.detect==0, 1,3)) lines(supsmu(d$age, presid.cloglog.2, bass=1),col=1,lwd=2, cex=1) #lines(lowess(presid.logit.2~log(d$pre.rna,10)),lwd=2) abline(h=0,lty=2) legend("bottomleft", c("undetectable", "detectable"), cex=0.8,pch=c(1,3), bty="n", col=gray(0.6)) plot(d$age, presid.logit, cex=0.1, type="n", ylim=c(-1, 1), xlab="age", ylab="PSR (logit)", main="") points(d$age, presid.logit,cex=0.5,col=gray(.6), pch=ifelse(d$post.rna.detect==0, 1,3) ) lines(supsmu(d$age, presid.logit, bass=1),col=1,lwd=2, cex=1) #lines(lowess(presid.logit~log(d$pre.rna,10)),lwd=2) abline(h=0,lty=2) legend("bottomleft", c("undetectable", "detectable"), cex=0.8,pch=c(1,3), bty="n", col=gray(0.6)) plot(d$age, presid.cloglog, cex=0.1, type="n", ylim=c(-1, 1), xlab="age", ylab="PSR (loglog) ", main="") points(d$age, presid.cloglog,cex=0.5,col=gray(.6), pch=ifelse(d$post.rna.detect==0, 1,3) ) lines(supsmu(d$age, presid.cloglog, bass=1),col=1,lwd=2, cex=1) #lines(lowess(presid.logit~log(d$pre.rna,10)),lwd=2) abline(h=0,lty=2) legend("bottomleft", c("undetectable", "detectable"), cex=0.8,pch=c(1,3), bty="n", col=gray(0.6)) mtext("not include age", side=3, at=1/4, outer=TRUE, cex=1) mtext("include age", side=3, at=3/4, outer=TRUE, cex=1) dev.off() ############################################################################ ####### Figure 15: performance of orm on estimation conditional quantiles, CDFs ######### ############################################################################# postscript("fig16-app_6.eps", height=4, width=6, horizontal = FALSE, onefile = FALSE, paper = "special") par(mfrow=c(2,3)) par(mar=c(2.5,2.5,0.5,0.5), mgp = c(1.5, 0.5, 0), oma=c(0.5,3,2,0.5)) new.age <- (180:820)/10 new.data <- matrix(NA, ncol=dim(orm.logit$x)[2], nrow=length(new.age)) colnames(new.data) <- colnames(orm.logit$x) for(i in 1:5) new.data[,i] <- rep(orm.logit$x[which(d$site=="brazil")[1], i], length(new.age)) new.data[, "male"] <- 1 new.data[, 7:9] <- rcspline.eval(new.age, knots=orm.logit$Design$parms$age, inclx=TRUE) for(i in 10:12) new.data[,i] <- rep(orm.logit$x[which(d$init.class=="NNRTI")[1], i], length(new.age)) for(i in 13:16) new.data[,i] <- rep(orm.logit$x[which(d$route=="homo/bisexual")[1], i], length(new.age)) for(i in 17:19) new.data[,i] <- rep(orm.logit$x[which(d$nadir.cd4==167)[1], i], length(new.age)) for(i in 20:22) new.data[,i] <- rep(orm.logit$x[which(d$init.year==2010)[1], i], length(new.age)) new.data[,23:25] <- matrix(rcspline.eval(log(91728), knots=orm.logit$Design$parms$pre.rna, inclx=TRUE),nrow=length(new.age), ncol=3, byrow=TRUE) for(i in 26:27) new.data[,i] <- rep(orm.logit$x[which(d$preARTAIDS.clinical=="not AIDS")[1], i], length(new.age)) new.data.ols <- data.frame(site="brazil", male=1, age=new.age, init.class="NNRTI", route="homo/bisexual", nadir.cd4=167, init.year=2010, pre.rna=91728, preARTAIDS.clinical="not AIDS") est.cdf.orm.logit <- cdf.orm(mod=orm.logit, new.data=new.data, at.y=0) est.cdf.orm.cloglog <- cdf.orm(mod=orm.cloglog, new.data=new.data, at.y=0) binary.mod <- lrm(post.rna.detect~ site + male + rcs(age,4) + init.class + route + rcs(sqrt(nadir.cd4),4) +rcs(init.year,4)+ rcs(log(pre.rna),4)+ preARTAIDS.clinical, data=d, x=TRUE, y=TRUE) test <- predict(binary.mod, new.data.ols, se.fit=TRUE) est.cdf.binary <- cbind(plogis(test$linear.predictors), plogis(with(test, linear.predictors + 1.96*cbind(-se.fit,se.fit)))) plot(new.age,1-est.cdf.orm.logit$est[,1], cex=0.01, xlab="age", ylab="Probability", ylim=c(0,0.15), typ="n") polygon(c(new.age, rev(new.age)), c(est.cdf.binary[,2], rev(est.cdf.binary[,3])), col = colors()[560], border = NA) polygon(c(new.age, rev(new.age)), c(1-est.cdf.orm.logit$ub[,1], rev(1-est.cdf.orm.logit$lb[,1])), col = colors()[618], border = NA) polygon(c(new.age, rev(new.age)), c(1-est.cdf.orm.cloglog$ub[,1], rev(1-est.cdf.orm.cloglog$lb[,1])), col = 'grey', border = NA) #polygon(c(new.age, rev(new.age)), c(est.cdf.boxcox[,2], rev(est.cdf.boxcox[,3])), col = "grey", border = NA) lines(new.age, 1-est.cdf.orm.cloglog$est[,1], cex=0.01, col=1) lines(new.age, 1-est.cdf.orm.cloglog$ub[,1], cex=0.01, lty=2, col=1) lines(new.age, 1-est.cdf.orm.cloglog$lb[,1], cex=0.01, lty=2, col=1) lines(new.age, 1-est.cdf.orm.logit$est[,1], cex=0.01, col=4) lines(new.age, 1-est.cdf.orm.logit$ub[,1], cex=0.01, lty=2, col=4) lines(new.age, 1-est.cdf.orm.logit$lb[,1], cex=0.01, lty=2, col=4) lines(new.age, est.cdf.binary[,1], cex=0.01, col=2) lines(new.age, est.cdf.binary[,2], cex=0.01, col=2, lty=2) lines(new.age, est.cdf.binary[,3], cex=0.01, col=2, lty=2) legend("top", c("orm (loglog)","orm (logit)", "logistic regression"), col=c(1,4,2), lty=c(1,1),bty="n") est.cdf.orm.logit.g1000<- cdf.orm(mod=orm.logit, new.data=new.data, at.y=1000) est.cdf.orm.cloglog.g1000 <- cdf.orm(mod=orm.cloglog, new.data=new.data, at.y=1000) post.rna.g1000 <- ifelse(d$post.rna.d>1000, 1, 0) binary.mod.g1000 <- lrm(post.rna.g1000~ site + male + rcs(age,4) + init.class + route + rcs(sqrt(nadir.cd4),4) +rcs(init.year,4)+ rcs(log(pre.rna),4)+ preARTAIDS.clinical, data=d, x=TRUE, y=TRUE) test <- predict(binary.mod.g1000, new.data.ols, se.fit=TRUE) est.cdf.binary.g1000 <- cbind(plogis(test$linear.predictors), plogis(with(test, linear.predictors + 1.96*cbind(-se.fit,se.fit)))) plot(new.age,1-est.cdf.orm.logit.g1000$est[,1], cex=0.01, xlab="age", ylab="Probability", ylim=c(0,0.15), typ="n") polygon(c(new.age, rev(new.age)), c(est.cdf.binary.g1000[,2], rev(est.cdf.binary.g1000[,3])), col = colors()[560], border = NA) polygon(c(new.age, rev(new.age)), c(1-est.cdf.orm.logit.g1000$ub[,1], rev(1-est.cdf.orm.logit.g1000$lb[,1])), col = colors()[618], border = NA) polygon(c(new.age, rev(new.age)), c(1-est.cdf.orm.cloglog.g1000$ub[,1], rev(1-est.cdf.orm.cloglog.g1000$lb[,1])), col = 'grey', border = NA) #polygon(c(new.age, rev(new.age)), c(est.cdf.boxcox[,2], rev(est.cdf.boxcox[,3])), col = "grey", border = NA) lines(new.age, 1-est.cdf.orm.cloglog.g1000$est[,1], cex=0.01, col=1) lines(new.age, 1-est.cdf.orm.cloglog.g1000$ub[,1], cex=0.01, lty=2, col=1) lines(new.age, 1-est.cdf.orm.cloglog.g1000$lb[,1], cex=0.01, lty=2, col=1) lines(new.age, 1-est.cdf.orm.logit.g1000$est[,1], cex=0.01, col=4) lines(new.age, 1-est.cdf.orm.logit.g1000$ub[,1], cex=0.01, lty=2, col=4) lines(new.age, 1-est.cdf.orm.logit.g1000$lb[,1], cex=0.01, lty=2, col=4) lines(new.age, est.cdf.binary.g1000[,1], cex=0.01, col=2) lines(new.age, est.cdf.binary.g1000[,2], cex=0.01, col=2, lty=2) lines(new.age, est.cdf.binary.g1000[,3], cex=0.01, col=2, lty=2) legend("top", c("orm (loglog)","orm (logit)", "logistic regression"), col=c(1,4,2), lty=c(1,1),bty="n") est.quantile.orm.logit <- quantile.orm(mod=orm.logit,new.data=new.data, probs=0.95,se=TRUE ) est.quantile.orm.cloglog <- quantile.orm(mod=orm.cloglog,new.data=new.data, probs=0.95,se=TRUE ) plot(new.age, est.quantile.orm.logit$quantile,type="n", cex=0.1, col=4, ylim=c(300, 6000), axes=FALSE, log="y", xlab="age", ylab="viral load") axis(2, at=c(400, 800, 1500, 3000, 6000), labels=c("DL", "800","1500", "3000", "6000")) axis(1, at=seq(20, 80, by=10), labels=c("20", "30","40","50", "60", "70","80"), las=1, col=1, cex.axis=0.8) polygon(c(new.age, rev(new.age)), c(est.quantile.orm.logit$ub, rev(est.quantile.orm.logit$lb[,1])), col = colors()[618], border = NA) polygon(c(new.age, rev(new.age)), c(est.quantile.orm.cloglog$ub, rev(est.quantile.orm.cloglog$lb[,1])), col ='grey' , border = NA) lines(new.age, est.quantile.orm.logit$quantile, cex=0.01, col=4) lines(new.age, est.quantile.orm.logit$ub, cex=0.01, col=4, lty=2) lines(new.age, est.quantile.orm.logit$lb, cex=0.01, col=4, lty=2) lines(new.age, est.quantile.orm.cloglog$quantile, cex=0.01) lines(new.age, est.quantile.orm.cloglog$lb cex=0.01, lty=2) lines(new.age, est.quantile.orm.cloglog$ub, cex=0.01, lty=2) abline(h=400, lty=2, col=1) legend("top", c("orm (loglog)","orm (logit)"), col=c(1,4), lty=c(1,1), bty="n") box() library(quantreg) # rq.mod <- rq(post.rna.d~ site + male + rcs(age,4) + init.class + route + rcs(sqrt(nadir.cd4),4) +rcs(init.year,4)+ rcs(log(pre.rna),4)+ preARTAIDS.clinical, data=d, tau=0.95) rq.mod.2 <- rq(tmp~ site + male + rcs(age,4) + init.class + route + rcs(sqrt(nadir.cd4),4) +rcs(init.year,4)+ rcs(log(pre.rna),4)+ preARTAIDS.clinical, data=d, tau=0.95) X <- cbind(1, new.data) pred <-drop(X%*%rq.mod$coef) V <- summary(rq.mod, cov = TRUE) df <- V$rdf tfrac <- qt((1 - 0.95)/2, df) sdpred <- sqrt(diag(X %*% V$cov %*% t(X))) est.quantile.rq <- cbind(pred, pred + tfrac * sdpred %o% c(1, -1)) colnames(est.quantile.rq) <- c("fit", "lower", "higher") X <- cbind(1, new.data) pred <-drop(X%*%rq.mod.2$coef) V <- summary(rq.mod.2, cov = TRUE) df <- V$rdf tfrac <- qt((1 - 0.95)/2, df) sdpred <- sqrt(diag(X %*% V$cov %*% t(X))) est.quantile.rq.2 <- cbind(pred, pred + tfrac * sdpred %o% c(1, -1)) colnames(est.quantile.rq.2) <- c("fit", "lower", "higher") new.data2 <- new.data[1:4,] new.data2[,7:9] <- matrix(rcspline.eval(35, knots=orm.logit$Design$parms$age, inclx=TRUE),nrow=4, ncol=3, byrow=TRUE) new.data2[1, 10:12] <- orm.logit$x[which(d$init.class=="Boosted PI")[1], 10:12] new.data2[2, 10:12] <- orm.logit$x[which(d$init.class=="NNRTI")[1], 10:12] new.data2[3, 10:12] <- orm.logit$x[which(d$init.class=="Unboosted PI")[1], 10:12] new.data2[4, 10:12] <- orm.logit$x[which(d$init.class=="Other")[1], 10:12] #est.mean.orm.logit <- mean.orm(orm.logit, new.data2,se=TRUE ) new.data2.ols <- data.frame(site="brazil", male=1, age=35, init.class=c("Boosted PI","NNRTI","Unboosted PI","Other"), route="homo/bisexual", nadir.cd4=167, init.year=2010, pre.rna=91728, preARTAIDS.clinical="not AIDS") est.cdf.orm.logit <- cdf.orm(mod=orm.logit, new.data=new.data2, at.y=0) est.cdf.orm.cloglog <- cdf.orm(mod=orm.cloglog, new.data=new.data2, at.y=0) test <- predict(binary.mod, new.data2.ols, se.fit=TRUE) est.cdf.binary <- cbind(plogis(test$linear.predictors), plogis(with(test, linear.predictors + 1.96*cbind(-se.fit,se.fit)))) plot(1:4,1-est.cdf.orm.cloglog$est, type="n", axes=FALSE, ylab="Probability", xlab="", xlim=c(0.5, 4.5), ylim=c(0.02, 0.22), main="") #abline(v=1) axis(2, at=c(0, 0.05, 0.1, 0.15, 0.2), labels=c("0", "0.05", "0.10", "0.15", "0.20")) segments(1:4,1-est.cdf.orm.cloglog$lb, 1:4, 1-est.cdf.orm.cloglog$ub) points(1:4,1-est.cdf.orm.cloglog$est, pch="-", cex=1) segments((1:4)+0.15,1-est.cdf.orm.logit$lb, (1:4)+0.15, 1-est.cdf.orm.logit$ub, col=4) points((1:4)+0.15,1-est.cdf.orm.logit$est, pch="-", cex=1, col=4) segments((1:4)+0.3, est.cdf.binary[,2], (1:4)+0.3, est.cdf.binary[,3], col=2) points((1:4)+0.3, est.cdf.binary[,1], pch="-", cex=1, col=2) legend("top", c("orm (loglog)","orm (logit)", "logistic regression"), col=c(1,4,2), lty=c(1,1),bty="n") axis(1, 1:4, labels=c("BPI","NNRTI","UBPI","Other"), las=1, tck=-0.01, col=1, cex.axis=0.8) box() est.cdf.orm.logit.g1000<- cdf.orm(mod=orm.logit, new.data=new.data2, at.y=1000) est.cdf.orm.cloglog.g1000 <- cdf.orm(mod=orm.cloglog, new.data=new.data2, at.y=1000) post.rna.g1000 <- ifelse(d$post.rna.d>1000, 1, 0) binary.mod.g1000 <- lrm(post.rna.g1000~ site + male + rcs(age,4) + init.class + route + rcs(sqrt(nadir.cd4),4) +rcs(init.year,4)+ rcs(log(pre.rna),4)+ preARTAIDS.clinical, data=d, x=TRUE, y=TRUE) test <- predict(binary.mod.g1000, new.data2.ols, se.fit=TRUE) est.cdf.binary.g1000 <- cbind(plogis(test$linear.predictors), plogis(with(test, linear.predictors + 1.96*cbind(-se.fit,se.fit)))) plot(1:4,1-est.cdf.orm.cloglog.g1000$est, type="n", axes=FALSE, ylab="Probability", xlab="", xlim=c(0.5, 4.5), ylim=c(0.02, 0.22), main="") #abline(v=1) axis(2, at=c(0, 0.05, 0.1, 0.15, 0.2), labels=c("0", "0.05", "0.10", "0.15", "0.20")) segments(1:4,1-est.cdf.orm.cloglog.g1000$lb, 1:4, 1-est.cdf.orm.cloglog.g1000$ub) points(1:4,1-est.cdf.orm.cloglog.g1000$est, pch="-", cex=1) segments((1:4)+0.1,1-est.cdf.orm.logit.g1000$lb, (1:4)+0.1, 1-est.cdf.orm.logit.g1000$ub, col=4) points((1:4)+0.1,1-est.cdf.orm.logit.g1000$est, pch="-", cex=1, col=4) segments((1:4)+0.2, est.cdf.binary.g1000[,2], (1:4)+0.2, est.cdf.binary.g1000[,3], col=2) points((1:4)+0.2, est.cdf.binary.g1000[,1], pch="-", cex=1, col=2) legend("top", c("orm (loglog)","orm (logit)", "logistic regression"), col=c(1,4,2), lty=c(1,1),bty="n") axis(1, 1:4, labels=c("BPI","NNRTI","UBPI","Other"), las=1, tck=-0.01, col=1, cex.axis=0.8) box() est.quantile.orm.logit <- quantile.orm(mod=orm.logit,new.data=new.data2, probs=0.95,se=TRUE ) est.quantile.orm.cloglog <- quantile.orm(mod=orm.cloglog,new.data=new.data2, probs=0.95,se=TRUE ) plot(1:4,est.quantile.orm.cloglog$est, type="n", axes=FALSE, ylab="viral load", xlab="", xlim=c(0.5, 4.5), ylim=c(300, 10000), main="", log="y") axis(2, at=c(400, 800,2500, 5000, 10000), labels=c("DL", "800","2500", "5000", "10000")) segments(1:4,est.quantile.orm.cloglog$lb, 1:4, est.quantile.orm.cloglog$ub) points(1:4,est.quantile.orm.cloglog$quantile, pch="-", cex=1) segments((1:4)+0.2, est.quantile.orm.logit$lb, (1:4)+0.2, est.quantile.orm.logit$ub, col=4) points((1:4)+0.2, est.quantile.orm.logit$quantile, pch="-", cex=1, col=4) legend("top", c("orm (loglog)","orm (logit)"), col=c(1,4), lty=c(1,1),bty="n") axis(1, 1:4, labels=c("BPI","NNRTI","UBPI","Other"), las=1, tck=-0.01, col=1, cex.axis=0.8) abline(h=400, col=1, lty=2) box() mtext("P[detectable VL |Z]", side=3, at=1/6, outer=TRUE, cex=1) mtext("P[VL>1000 |Z]", side=3, at=3/6, outer=TRUE, cex=1) mtext("95th percentile", side=3, at=5/6, outer=TRUE, cex=1) dev.off()