library(SparseM) library(rms) library(PResiduals) ################################################################################################ ######################### FUNCTIONS ########################################################### ################################################################################################ orm.scores <- function (y, X, link=c("logistic", "probit", "cauchit", "loglog", "cloglog")) { y <- as.numeric(factor(y)) if(!is.matrix(X)) X = matrix(X, ncol=1) N = length(y) ny = length(table(y)) na = ny - 1 nb = ncol(X) npar = na + nb Z = outer(y, 1:ny, "<=") mod <- orm(y~X, family=link[1], x=TRUE, y=TRUE) alpha = mod$coeff[1:na] beta = mod$coeff[-(1:na)] dl.dtheta = matrix(0, N, npar) Gamma = matrix(0,N,na) dlow.dtheta = dhi.dtheta =dpij.dtheta= matrix(, npar, N) for (i in 1:N) { z = Z[i,] ## z has length ny x = X[i,] gamma <-1- mod$trans$cumprob(alpha + sum(beta*x)) diffgamma = diff(c(gamma,1)) diffgamma <- ifelse(diffgamma<1e-16, 1e-16, diffgamma) invgamma = 1/gamma invgamma <- ifelse(invgamma==Inf, 1e16, invgamma) invgamma2 = invgamma^2 invdiffgamma = 1/diffgamma invdiffgamma2 = invdiffgamma^2 phi = log(gamma / diffgamma) ## phi has length na Gamma[i,] = gamma dg.dphi = 1 - 1/(1 + exp(phi)) ## l is the log likelihood (6.3) in McCullagh (1980) dl.dphi = z[-ny] - z[-1] * dg.dphi t.dl.dphi = t(dl.dphi) ## dphi.dgamma is a na*na matrix with rows indexed by phi ## and columns indexed by gamma dphi.dgamma = matrix(0,na,na) diag(dphi.dgamma) = invgamma + invdiffgamma if(na > 1) dphi.dgamma[cbind(1:(na-1), 2:na)] = -invdiffgamma[-na] tmp <- mod$trans$deriv(x=alpha + sum(beta*x), f=mod$trans$cumprob(alpha + sum(beta*x))) tmp <- ifelse(tmp<1e-16, 1e-16, tmp) dgamma.base <- - tmp dgamma.dalpha = diagn(dgamma.base) dgamma.dbeta = dgamma.base %o% x dl.dalpha = dl.dphi %*% dphi.dgamma %*% dgamma.dalpha dl.dbeta = dl.dphi %*% dphi.dgamma %*% dgamma.dbeta dl.dtheta[i,] = c(dl.dalpha, dl.dbeta) if (y[i] == 1) { dlow.dtheta[,i] <- 0 } else { dlow.dtheta[,i] <- c(dgamma.dalpha[y[i]-1,], dgamma.dbeta[y[i]-1, ]) } if (y[i] == ny) { dhi.dtheta[,i] <- 0 } else { dhi.dtheta[,i] <- -c(dgamma.dalpha[y[i],], dgamma.dbeta[y[i],]) } } low = cbind(0, Gamma)[cbind(1:N, y)] hi = cbind(1-Gamma, 0)[cbind(1:N, y)] presid <- low - hi dpresid.dtheta <- dlow.dtheta - dhi.dtheta result <- list(dl.dtheta=dl.dtheta, d2l.dtheta.dtheta = -mod$info.matrix, presid=presid, dpresid.dtheta = dpresid.dtheta ) return(result) } corTS = function(xresid, yresid, xz.dl.dtheta, yz.dl.dtheta, xz.d2l.dtheta.dtheta, yz.d2l.dtheta.dtheta, dxresid.dthetax, dyresid.dthetay,fisher=FALSE){ TS = cor(xresid, yresid) xresid2 = xresid^2 yresid2 = yresid^2 xbyyresid = xresid * yresid mean.xresid = mean(xresid) mean.yresid = mean(yresid) mean.xbyyresid = mean(xbyyresid) bigphi = cbind(xz.dl.dtheta, yz.dl.dtheta, mean.xresid - xresid, mean.yresid - yresid, mean.xbyyresid - xbyyresid, mean(xresid2)-xresid2, mean(yresid2)-yresid2, 0) npar.xz = dim(xz.dl.dtheta)[2] npar.yz = dim(yz.dl.dtheta)[2] Ntheta = npar.xz + npar.yz + 6 N = dim(xz.dl.dtheta)[1] A = matrix(0,Ntheta,Ntheta) A[1:npar.xz, 1:npar.xz] =xz.d2l.dtheta.dtheta A[npar.xz+(1:npar.yz), npar.xz+(1:npar.yz)] = yz.d2l.dtheta.dtheta A[Ntheta-6+(1:6), Ntheta-6+(1:6)] = diag(N, 6) bigpartial = rbind(c(dxresid.dthetax %*% rep(1, N), rep(0, npar.yz)), c(rep(0, npar.xz), dyresid.dthetay %*% rep(1, N)), c(dxresid.dthetax %*% yresid, dyresid.dthetay %*% xresid), c(dxresid.dthetax %*% (2*xresid), rep(0, npar.yz)), c(rep(0, npar.xz), dyresid.dthetay %*% (2*yresid))) A[Ntheta-6+(1:5), 1:(npar.xz+npar.yz)] = -bigpartial ## TS also equals numTS / sqrt(varprod) = numTS * revsvp numTS = mean.xbyyresid - mean.xresid * mean.yresid var.xresid = mean(xresid2) - mean.xresid^2 var.yresid = mean(yresid2) - mean.yresid^2 varprod = var.xresid * var.yresid revsvp = 1/sqrt(varprod) dTS.dvarprod = numTS * (-0.5) * revsvp^3 smallpartial = N * c(-mean.yresid * revsvp + dTS.dvarprod * (-2*mean.xresid*var.yresid), -mean.xresid * revsvp + dTS.dvarprod * (-2*mean.yresid*var.xresid), revsvp, dTS.dvarprod * var.yresid, dTS.dvarprod * var.xresid) A[Ntheta, Ntheta-6+(1:5)] = -smallpartial A[Ntheta, Ntheta-6+(1:5)] = -smallpartial SS = solve(A, t(bigphi)) var.theta = tcrossprod (SS, SS) varTS = var.theta[Ntheta, Ntheta] pvalTS = 2 * pnorm( -abs(TS)/sqrt(varTS)) if (fisher==TRUE){ ####Fisher's transformation TS_f <- log( (1+TS)/(1-TS) ) var.TS_f <- varTS*(2/(1-TS^2))^2 pvalTS <- 2 * pnorm( -abs(TS_f)/sqrt(var.TS_f)) } list(TS=TS,var.TS=varTS, pval.TS=pvalTS) } getCI <- function(ts, var, fisher, ci=0.95){ if(!fisher){ lower <- ts - abs(qnorm(0.5*(1-ci)))*sqrt(var) upper <- ts + abs(qnorm(0.5*(1-ci)))*sqrt(var) } else { ts_f <- log( (1+ts)/(1-ts) ) var_f <- var*(2/(1-ts^2))^2 lower_f <- ts_f - abs(qnorm(0.5*(1-ci)))*sqrt(var_f) upper_f <- ts_f + abs(qnorm(0.5*(1-ci)))*sqrt(var_f) lower <- (exp(lower_f)-1)/(1+exp(lower_f)) upper <- (exp(upper_f)-1)/(1+exp(upper_f)) } return(c(lower, upper)) } ###### obtaining PSRs from two orm models cocobot.orm <- function(formula, data, link.x=c("logistic", "probit", "cauchit", "loglog", "cloglog"), link.y=c("logistic", "probit", "cauchit", "loglog", "cloglog"), subset, na.action=getOption('na.action'), fisher=FALSE,conf.int=0.95) { # Construct the model frames for x ~ z and y ~ z F1 <- Formula(formula) Fx <- formula(F1, lhs=1) Fy <- formula(F1, lhs=2) mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data", "subset", "weights", "na.action", "offset"), names(mf), 0L) mf <- mf[c(1L, m)] mf$drop.unused.levels <- TRUE mf$na.action <- na.action # We set xlev to a benign non-value in the call so that it won't get partially matched # to any variable in the formula. For instance a variable named 'x' could possibly get # bound to xlev, which is not what we want. mf$xlev <- integer(0) mf[[1L]] <- as.name("model.frame") mx <- my <- mf # NOTE: we add the opposite variable to each model frame call so that # subsetting occurs correctly. Later we strip them off. mx[["formula"]] <- Fx yName <- all.vars(Fy[[2]])[1] mx[[yName]] <- Fy[[2]] my[["formula"]] <- Fy xName <- all.vars(Fx[[2]])[1] my[[xName]] <- Fx[[2]] mx <- eval(mx, parent.frame()) mx[[paste('(',yName,')',sep='')]] <- NULL my <- eval(my, parent.frame()) my[[paste('(',xName,')',sep='')]] <- NULL data.points <- nrow(mx) # Construct the model matrix z mxz <- model.matrix(attr(mx,'terms'),mx) zzint <- match("(Intercept)", colnames(mxz), nomatch = 0L) if(zzint > 0L) { mxz <- mxz[, -zzint, drop = FALSE] } myz <- model.matrix(attr(my,'terms'),my) zzint <- match("(Intercept)", colnames(myz), nomatch = 0L) if(zzint > 0L) { myz <- myz[, -zzint, drop = FALSE] } ## return value ans <- list( TS=list(), fisher=fisher, conf.int=conf.int, data.points=data.points ) score.xz <- orm.scores(y=model.response(mx), X=mxz, link=link.x) score.yz <- orm.scores(y=model.response(my), X=myz, link=link.y) ts = corTS(score.xz$presid, score.yz$presid, score.xz$dl.dtheta, score.yz$dl.dtheta, as.matrix(score.xz$d2l.dtheta.dtheta), as.matrix(score.yz$d2l.dtheta.dtheta), score.xz$dpresid.dtheta, score.yz$dpresid.dtheta,fisher) ts.label = "PResid vs. PResid" ans$TS$TS <- list( ts=ts$TS, var=ts$var.TS, pval=ts$pval.TS, label = ts.label) ans <- structure(ans, class="cocobot") # Apply confidence intervals for (i in seq_len(length(ans$TS))){ ts_ci <- getCI(ans$TS[[i]]$ts,ans$TS[[i]]$var,ans$fisher,conf.int) ans$TS[[i]]$lower <- ts_ci[1] ans$TS[[i]]$upper <- ts_ci[2] } ans } lm.scores = function(y, X){ N = length(y) mod = lm(y~X) smod = summary(mod) resid = smod$residuals d2l.dtheta.dtheta = -crossprod(cbind(1, X)) dl.dtheta <- resid*cbind(1, X) presid = 2*pnorm((y - mod$fitted.values)/smod$sigma) -1 dresid.dtheta = t(cbind(-1, -X)) dpresid.dtheta = t(cbind(-2*dnorm((y - mod$fitted.values)/smod$sigma)/smod$sigma, -2*dnorm((y - mod$fitted.values)/smod$sigma)/smod$sigma * X)) f.y<-density(resid) fy.ry <- NULL presid.k <- NULL for (i in 1:length(resid)){ fy.ry[i] <- f.y$y[which(abs(f.y$x-resid[i])==min(abs(f.y$x-resid[i])))] presid.k[i] <- sum(residresid[i])/length(resid) } dpresid.dtheta.k <- t(cbind(-2*fy.ry, -2*fy.ry*X)) list(mod = mod, dl.dtheta = dl.dtheta, d2l.dtheta.dtheta = d2l.dtheta.dtheta, resid = resid, dresid.dtheta = dresid.dtheta, presid = presid, presid.k= presid.k, dpresid.dtheta = dpresid.dtheta, dpresid.dtheta.k = dpresid.dtheta.k) } continuous.bot <- function(formula, data, subset, na.action=getOption('na.action'), emp=TRUE,fisher=FALSE,conf.int=0.95) { F1 <- Formula(formula) Fx <- formula(F1, lhs=1) Fy <- formula(F1, lhs=2) mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data", "subset", "weights", "na.action", "offset"), names(mf), 0L) mf <- mf[c(1L, m)] mf$drop.unused.levels <- TRUE mf$na.action <- na.action mf$xlev <- integer(0) mf[[1L]] <- as.name("model.frame") mx <- my <- mf mx[["formula"]] <- Fx yName <- all.vars(Fy[[2]])[1] mx[[yName]] <- Fy[[2]] my[["formula"]] <- Fy xName <- all.vars(Fx[[2]])[1] my[[xName]] <- Fx[[2]] mx <- eval(mx, parent.frame()) mx[[paste('(',yName,')',sep='')]] <- NULL my <- eval(my, parent.frame()) my[[paste('(',xName,')',sep='')]] <- NULL data.points <- nrow(mx) mxz <- model.matrix(attr(mx,'terms'),mx) zzint <- match("(Intercept)", colnames(mxz), nomatch = 0L) if(zzint > 0L) { mxz <- mxz[, -zzint, drop = FALSE] } myz <- model.matrix(attr(my,'terms'),my) zzint <- match("(Intercept)", colnames(myz), nomatch = 0L) if(zzint > 0L) { myz <- myz[, -zzint, drop = FALSE] } score.xz <- lm.scores(y=model.response(mx), X=mxz) score.yz <- lm.scores(y=model.response(my), X=myz) npar.xz = dim(score.xz$dl.dtheta)[2] npar.yz = dim(score.yz$dl.dtheta)[2] ans <- list( TS=list(), fisher=fisher, conf.int=conf.int, data.points=data.points ) if (emp==TRUE){ ### presid vs presid (emprical) tb = corTS(score.xz$presid.k, score.yz$presid.k, score.xz$dl.dtheta, score.yz$dl.dtheta, score.xz$d2l.dtheta.dtheta, score.yz$d2l.dtheta.dtheta, score.xz$dpresid.dtheta.k, score.yz$dpresid.dtheta.k,fisher) tb.label = "PResid vs. PResid (empirical)" } else { ### presid vs presid (use pdf of normal) tb = corTS(score.xz$presid, score.yz$presid, score.xz$dl.dtheta, score.yz$dl.dtheta, score.xz$d2l.dtheta.dtheta, score.yz$d2l.dtheta.dtheta, score.xz$dpresid.dtheta, score.yz$dpresid.dtheta,fisher) tb.label = "PResid vs. PResid (assume normality)" } ans$TS$TB <- list(ts=tb$TS, var=tb$var.TS, pval=tb$pval.TS,label = tb.label) tc <- corTS(score.xz$resid, score.yz$resid, score.xz$dl.dtheta, score.yz$dl.dtheta, score.xz$d2l.dtheta.dtheta, score.yz$d2l.dtheta.dtheta, score.xz$dresid.dtheta, score.yz$dresid.dtheta,fisher) ans$TS$TC <- list( ts=tc$TS, var=tc$var.TS, pval=tc$pval.TS,label = 'Obs-Exp vs. Obs-Exp') ans <- structure(ans, class="cocobot") for (i in seq_len(length(ans$TS))){ ts_ci <- getCI(ans$TS[[i]]$ts,ans$TS[[i]]$var,ans$fisher,conf.int) ans$TS[[i]]$lower <- ts_ci[1] ans$TS[[i]]$upper <- ts_ci[2] } return(ans) } ################################################################################################ ######################### SIMULATIONS ###################################################### ################################################################################################ ######################### SIMULATION I ########################################################## ########## simulation for Table 1 Scenarios I and II: continuous X and continuous Y generate.data.1 <- function(seed, n, alpha, beta, rho){ set.seed(seed) z <- rnorm(n, 0, 1) data <- cbind(alpha[1] + alpha[2]*z, beta[1]+beta[2]*z) + mvrnorm(n, mu=rep(0, 2), matrix(c(1, rho, rho,1), 2,2)) data <- cbind(data, z) colnames(data) <- c("x", "y", "z") data <- data.frame(data) return(data) } ####### using PSRs from the orm procedures sim1.psr.orm <- function(sim=100, seed=1, N=200, alpha=c(1,1), beta=c(1,-1), rho=0.2){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] est.ts <- matrix(NA, nrow=sim, ncol=4) est.se <- matrix(NA, nrow=sim, ncol=4) pval <- matrix(NA, nrow=sim, ncol=4) lower <- matrix(NA,nrow=sim, ncol=4 ) upper <- matrix(NA, nrow=sim, ncol=4) colnames(est.ts) <- colnames(est.se) <- colnames(pval) <- colnames(lower) <- colnames(upper) <- c( "probit", "logit", "loglog", "cloglog") #ts.lm.normal <- ts.lm.emp <- ts.probit <- ts.logit <- ts.loglog <- ts.cloglog <- ts.ks <- ts.partial.s <- ts.partial.k<- rep(NA, sim) for(i in 1:sim){ try({ data <- generate.data.1(seed=seeds[i], n=N, alpha=alpha, beta=beta, rho=rho) result <- cocobot.orm(x|y~z, link.x="probit", link.y="probit", data=data, fisher=TRUE) est.ts[i, 1] <- result$TS$TS$ts est.se[i, 1] <- sqrt(result$TS$TS$var) pval[i, 1] <- result$TS$TS$pval lower[i, 1] <- result$TS$TS$lower upper[i, 1] <- result$TS$TS$upper result <- cocobot.orm(x|y~z, link.x="logistic",link.y="logistic",data=data, fisher=TRUE) est.ts[i, 2] <- result$TS$TS$ts est.se[i, 2] <- sqrt(result$TS$TS$var) pval[i, 2] <- result$TS$TS$pval lower[i, 2] <- result$TS$TS$lower upper[i, 2] <- result$TS$TS$upper result <- cocobot.orm(x|y~z, link.x="loglog",link.y="loglog", data=data, fisher=TRUE) est.ts[i, 3] <- result$TS$TS$ts est.se[i, 3] <- sqrt(result$TS$TS$var) pval[i, 3] <- result$TS$TS$pval lower[i, 3] <- result$TS$TS$lower upper[i, 3] <- result$TS$TS$upper result <- cocobot.orm(x|y~z, link.x="cloglog",link.y="cloglog", data=data, fisher=TRUE) est.ts[i, 4] <- result$TS$TS$ts est.se[i, 4] <- sqrt(result$TS$TS$var) pval[i, 4] <- result$TS$TS$pval lower[i, 4] <- result$TS$TS$lower upper[i, 4] <- result$TS$TS$upper }) } return(list(est.ts=est.ts, est.se=est.se, pval=pval, lower=lower, upper=upper, rho=rho)) } ####### using PSRs from the linear regression models sim1.psr.lm <- function(sim=100, seed=1, N=200, alpha=c(1,1), beta=c(1,-1), rho=0.2){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] est.ts <- matrix(NA, nrow=sim, ncol=2*4) est.se <- matrix(NA, nrow=sim, ncol=2*4) pval <- matrix(NA, nrow=sim, ncol=2*4) lower <- matrix(NA,nrow=sim, ncol=2*4 ) upper <- matrix(NA, nrow=sim, ncol=2*4) colnames(est.ts) <- colnames(est.se) <- colnames(pval) <- colnames(lower) <- colnames(upper) <- rep(c("normal", "emp"),4) #ts.lm.normal <- ts.lm.emp <- ts.probit <- ts.logit <- ts.loglog <- ts.cloglog <- ts.ks <- ts.partial.s <- ts.partial.k<- rep(NA, sim) for(i in 1:sim){ try({ data <- generate.data.1(seed=seeds[i], n=N, alpha=alpha, beta=beta, rho=rho) result <- continuous.bot(x|y~z, data=data, fisher=TRUE, emp=FALSE) est.ts[i, 1] <- result$TS$TB$ts est.se[i, 1] <- sqrt(result$TS$TB$var) pval[i, 1] <- result$TS$TB$pval lower[i, 1] <- result$TS$TB$lower upper[i, 1] <- result$TS$TB$upper result <- continuous.bot(x|y~z, data=data, fisher=TRUE, emp=TRUE) est.ts[i, 2] <- result$TS$TB$ts est.se[i, 2] <- sqrt(result$TS$TB$var) pval[i, 2] <- result$TS$TB$pval lower[i, 2] <- result$TS$TB$lower upper[i, 2] <- result$TS$TB$upper result <- continuous.bot(x|exp(y)~z, data=data, fisher=TRUE, emp=FALSE) est.ts[i, 3] <- result$TS$TB$ts est.se[i, 3] <- sqrt(result$TS$TB$var) pval[i, 3] <- result$TS$TB$pval lower[i, 3] <- result$TS$TB$lower upper[i, 3] <- result$TS$TB$upper result <- continuous.bot(x|exp(y)~z, data=data, fisher=TRUE, emp=TRUE) est.ts[i, 4] <- result$TS$TB$ts est.se[i, 4] <- sqrt(result$TS$TB$var) pval[i, 4] <- result$TS$TB$pval lower[i, 4] <- result$TS$TB$lower upper[i, 4] <- result$TS$TB$upper }) } return(list(est.ts=est.ts, est.se=est.se, pval=pval, lower=lower, upper=upper, rho=rho)) } ####### using PSRs from kernel methods nw.kernel <- function(x, h, kernel){ ws <- sapply(x, FUN=function(i) kernel.function( (i-x)/h, kernel=kernel) /sum(kernel.function( (i-x)/h, kernel=kernel))) return(ws) } kernel.PSR<- function(y, z, kernel="gaussian", h="silverman"){ if(h=="silverman"){ h=1.06*sd(z)*length(z)^(-1/5) } wt <- nw.kernel(z, h, kernel=kernel) cdf <- sapply(1:length(y), FUN=function(i) sum((y<=y[i])*wt[,i]) ) cdf.minus <- sapply(1: length(y), FUN=function(i) sum((y=gamma), 2, FUN=function(x) mean(x, na.rm=TRUE) ) power <- apply(result$pval, 2, FUN=function(x) mean(x<0.05, na.rm=TRUE)) bias <- (est.ts-gamma)/gamma*100 out <- rbind( format(round(gamma, 3), nsmall=3), format(round(est.ts,3), nsmall=3), format(round(bias,2), nsmall=2), format(round(est.se,3), nsmall=3), format(round(emp.se,3), nsmall=3), format(round(mse,4), nsmall=4), format(round(coverage,3), nsmall=3 )) return(out) } ###### compute the population parameter in Scenarios III and IV data <- generate.data.2(seed=1, n=10^6, alpha=c(1, 1), beta=c(1,-1), rho=0.6, cut.p = c(0, 0.2, 0.4, 0.6, 0.8, 1)) data$y.psr <- 2*pnorm(data$y, beta[1] + beta[2]*data$z, 1)-1 data$x.psr <- pnorm( cut.1[data$x+1], alpha[1]+alpha[2]*data$z, 1) + pnorm( cut.1[data$x], alpha[1]+alpha[2]*data$z, 1) -1 gamma.dis <- cor(data$x.psr, data$y.psr ) summary.sim2 <- function(result, gamma=gamma.dis){ est.ts <- apply(result$est.ts, 2, FUN=function(x) mean(x, na.rm=TRUE)) est.se <- apply(result$est.se, 2, FUN=function(x) mean(x, na.rm=TRUE)) emp.se <- apply(result$est.ts, 2, FUN=function(x) sd(x, na.rm=TRUE)) mse <- apply( (result$est.ts -gamma)^2, 2, FUN=function(x) mean(x, na.rm=TRUE)) coverage <- apply( (result$lower <=gamma)*(result$upper>=gamma), 2, FUN=function(x) mean(x, na.rm=TRUE) ) power <- apply(result$pval, 2, FUN=function(x) mean(x<0.05, na.rm=TRUE)) bias <- (est.ts-gamma)/gamma*100 out <- rbind( format(round(gamma, 3), nsmall=3), format(round(est.ts,3), nsmall=3), format(round(bias,2), nsmall=2), format(round(est.se,3), nsmall=3), format(round(emp.se,3), nsmall=3), format(round(mse,4), nsmall=4), format(round(coverage,3), nsmall=3 )) return(out) } out.psr.lm.1 <- summary.sim1(result.psr.lm.12) out.psr.orm.1 <- summary.sim1(result.psr.orm.12) out.ks.1 <- summary.sim1(result.ks.12) out.psr.lm.2 <- summary.sim2(result.psr.lm.34, gamma.dis) out.psr.orm.2 <- summary.sim2(result.psr.orm.34, gamma.dis) out.ks.2 <- summary.sim2(result.ks.34, gamma.dis) ##### scenario 1: (x, y)|z ~ MVN out.1 <- cbind(out.psr.lm.1[, c(1,2)], out.ks.1, out.psr.orm.1) ##### scenario 2: (x, y)|z ~ MVN; y= exp(y) out.2 <- cbind(out.psr.lm.1[, c(3,4)], out.ks.1, out.psr.orm.1) ##### scenario 3: (x, y)|z ~ MVN, x is discreted out.3 <- cbind(out.psr.lm.2[, c(1,2)], out.ks.2, out.psr.orm.2) ##### scenario 4: (x, y)|z ~ MVN, x is discreted, y= exp(y) out.4 <- cbind(out.psr.lm.2[, c(3,4)], out.ks.2, out.psr.orm.2) ####### reformat for printing ###### out.1 <- t(out.1) out.2 <- t(out.2) out.3 <- t(out.3) out.4 <- t(out.4) out.1[3,] <- ifelse(out.1[3,]=="NaN","---", out.1[3,]) out.2[3,] <- ifelse(out.2[3,]=="NaN","---", out.2[3,]) out.3[3,] <- ifelse(out.3[3,]=="NaN","---", out.3[3,]) out.4[3,] <- ifelse(out.4[3,]=="NaN","---", out.4[3,]) out.1 <- rbind(rep("", 7), out.1[1:2,], rep("", 7), out.1[3,], rep("", 7), out.1[4:7,]) rownames(out.1)[2] <- "normality" rownames(out.1)[5] <- "Silverman" rownames(out.1)[which(rownames(out.1)!="" )] <- paste("~ ~ ~ ~ ~ ~ ",rownames(out.1)[which(rownames(out.1)!="" )], sep="") rownames(out.1)[which(rownames(out.1)=="")] <- c("LM", "kernel", "orm") out.2 <- rbind(rep("", 7), out.2[1:2,], rep("", 7), out.2[3,], rep("", 7), out.2[4:7,]) rownames(out.2)[2] <- "normality" rownames(out.2)[5] <- "Silverman" rownames(out.2)[which(rownames(out.2)!="" )] <- paste("~ ~ ~ ~ ~ ~ ",rownames(out.2)[which(rownames(out.2)!="" )], sep="") rownames(out.2)[which(rownames(out.2)=="")] <- c("LM", "kernel", "orm") out.3 <- rbind(rep("", 7), out.3[1:2,], rep("", 7), out.3[3,], rep("", 7), out.3[4:7,]) rownames(out.3)[2] <- "normality" rownames(out.3)[5] <- "Silverman" rownames(out.3)[which(rownames(out.3)!="" )] <- paste("~ ~ ~ ~ ~ ~ ",rownames(out.3)[which(rownames(out.3)!="" )], sep="") rownames(out.3)[which(rownames(out.3)=="")] <- c("LM", "kernel", "orm") out.4 <- rbind(rep("", 7), out.4[1:2,], rep("", 7), out.4[3,], rep("", 7), out.4[4:7,]) rownames(out.4)[2] <- "normality" rownames(out.4)[5] <- "Silverman" rownames(out.4)[which(rownames(out.4)!="" )] <- paste("~ ~ ~ ~ ~ ~ ",rownames(out.4)[which(rownames(out.4)!="" )], sep="") rownames(out.4)[which(rownames(out.4)=="")] <- c("LM", "kernel", "orm") out <- rbind(rep("", 7), out.1, rep("", 7), out.2, rep("",7), out.3, rep("", 7), out.4) rownames(out)[which(rownames(out)!="" )] <- paste(" ~ ~ ~ ~ ~ ~",rownames(out)[which(rownames(out)!="" )], sep="") rownames(out)[which(rownames(out)=="" )] <- c("I", "II", "III", "IV") latex(out, file="",size='scriptsize', caption="", rowlabel='Scenarios') ######################### SIMULATION II ########################################################### ####### Test based on our partial estimators sim=10^4 seed=1 n=200 alpha=c(1, 1) beta=c(1, -1) result2.psr.orm.12.H0 <- sim1.psr.orm (sim=sim, seed=seed, N=n, alpha=alpha, beta=beta, rho=0) result2.psr.orm.12.H1 <- sim1.psr.orm (sim=sim, seed=seed, N=n, alpha=alpha, beta=beta, rho=0.2) result2.psr.orm.34.H0 <- sim2.psr.orm (sim=sim, seed=seed, N=n, alpha=alpha, beta=beta, rho=0) result2.psr.orm.34.H1 <- sim2.psr.orm (sim=sim, seed=seed, N=n, alpha=alpha, beta=beta, rho=0.2) ##### Test based on traditional partial correlation coefficients library(ppcor) sim1.lm.par <- function(sim=100, seed=1, N=200, alpha=c(1,1), beta=c(1,-1), rho=0.2){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] est.ts <- matrix(NA, nrow=sim, ncol=6) pval <- matrix(NA, nrow=sim, ncol=6) colnames(est.ts)<- colnames(pval) <- rep(c("pearson", "spearman", "kendall"), 2) for(i in 1:sim){ try({ data <- generate.data.1(seed=seeds[i], n=N, alpha=alpha, beta=beta, rho=rho) result <- pcor.test(data$x, data$y, data$z, method="pearson") est.ts[i, 1] <- result$estimate pval[i, 1] <- result$p.value result <- pcor.test(data$x, data$y, data$z, method="spearman") est.ts[i, 2] <- result$estimate pval[i, 2] <- result$p.value result <- pcor.test(data$x, data$y, data$z, method="kendall") est.ts[i, 3] <- result$estimate pval[i, 3] <- result$p.value #### exp(y) result <- pcor.test(data$x, exp(data$y), data$z, method="pearson") est.ts[i, 4] <- result$estimate pval[i, 4] <- result$p.value result <- pcor.test(data$x, exp(data$y), data$z, method="spearman") est.ts[i, 5] <- result$estimate pval[i, 5] <- result$p.value result <- pcor.test(data$x, exp(data$y), data$z, method="kendall") est.ts[i, 6] <- result$estimate pval[i, 6] <- result$p.value }) } return(list(est.ts=est.ts, est.se=est.se, pval=pval, lower=lower, upper=upper, rho=rho)) } sim2.lm.par <- function(sim=100, seed=1, N=200, alpha=c(1,1), beta=c(1,-1), rho=0.2){ set.seed(seed) seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] est.ts <- matrix(NA, nrow=sim, ncol=6) pval <- matrix(NA, nrow=sim, ncol=6) colnames(est.ts)<- colnames(pval) <- rep(c("pearson", "spearman", "kendall"), 2) for(i in 1:sim){ try({ data <- generate.data.2(seed=seeds[i], n=N, alpha=alpha, beta=beta, rho=rho) result <- pcor.test(data$x, data$y, data$z, method="pearson") est.ts[i, 1] <- result$estimate pval[i, 1] <- result$p.value result <- pcor.test(data$x, data$y, data$z, method="spearman") est.ts[i, 2] <- result$estimate pval[i, 2] <- result$p.value result <- pcor.test(data$x, data$y, data$z, method="kendall") est.ts[i, 3] <- result$estimate pval[i, 3] <- result$p.value #### exp(y) result <- pcor.test(data$x, exp(data$y), data$z, method="pearson") est.ts[i, 4] <- result$estimate pval[i, 4] <- result$p.value result <- pcor.test(data$x, exp(data$y), data$z, method="spearman") est.ts[i, 5] <- result$estimate pval[i, 5] <- result$p.value result <- pcor.test(data$x, exp(data$y), data$z, method="kendall") est.ts[i, 6] <- result$estimate pval[i, 6] <- result$p.value }) } return(list(est.ts=est.ts, est.se=est.se, pval=pval, lower=lower, upper=upper, rho=rho)) } result2.lm.par.12.H0 <- sim1.lm.par (sim=sim, seed=seed, N=n, alpha=alpha, beta=beta, rho=0) result2.lm.par.12.H1 <- sim1.lm.par (sim=sim, seed=seed, N=n, alpha=alpha, beta=beta, rho=0.2) result2.lm.par.34.H0 <- sim2.lm.par (sim=sim, seed=seed, N=n, alpha=alpha, beta=beta, rho=0) result2.lm.par.34.H1 <- sim2.lm.par (sim=sim, seed=seed, N=n, alpha=alpha, beta=beta, rho=0.2) ## summarize the result in Table 2 summary.power <- function(result){ power <- apply(result$pval, 2, FUN=function(x) mean(x<0.05, na.rm=TRUE)) return(power) } out2.lm.par.12.H0 <- summary.power(result2.lm.par.12.H0) out2.lm.par.12.H1 <- summary.power(result2.lm.par.12.H1) out2.lm.par.34.H0 <- summary.power(result2.lm.par.34.H0) out2.lm.par.34.H1 <- summary.power(result2.lm.par.34.H1) out2.psr.orm.12.H0 <- summary(result2.psr.orm.12.H0) out2.psr.orm.12.H1 <- summary(result2.psr.orm.12.H1) out2.psr.orm.34.H0 <- summary(result2.psr.orm.34.H0) out2.psr.orm.34.H1 <- summary(result2.psr.orm.34.H1) out.1 <- rbind( c(out2.psr.orm.12.H0, out2.lm.par.12.H0[1:3]), c(out2.psr.orm.12.H1, out2.lm.par.12.H1[1:3]) ) out.2 <- rbind( c(out2.psr.orm.12.H0, out2.lm.par.12.H0[4:6]), c(out2.psr.orm.12.H1, out2.lm.par.12.H1[4:6]) ) out.3 <- rbind( c(out2.psr.orm.34.H0, out2.lm.par.34.H0[1:3]), c(out2.psr.orm.34.H1, out2.lm.par.34.H1[1:3]) ) out.4 <- rbind( c(out2.psr.orm.34.H0, out2.lm.par.34.H0[4:6]), c(out2.psr.orm.34.H1, out2.lm.par.34.H1[4:6]) ) out <- rbind(rep("", 6), out.1, rep("", 6), out.2, rep("", 6), out.3, rep("", 6), out.4) out <- cbind(rep(c("","$H_0$", "$H_1$"), 4), out) rownames(out) <- c("I", "", "","II", "", "","III", "", "","IV", "", "") latex(out, file="", cgroup=c("","correlation of PSRs ", "Partial correlations"), n.cgroup=c(1,4,3), colheads=c("","probit","logit","loglog","cloglog", "Pearson","Spearman"), rowlabel='Scenarios',size='scriptsize', caption="") ######################################################################### ################## APPLICATION EXAMPLES ################### ######################################################################### ################## SCIP SURVEY ################### ###### need to handle missing in SCIP survey load("cleaned_scip.RData") library(rms) diagn <- function(x=1, nrow=length(x), ncol=nrow) if(is.matrix(x)) diag(x=x) else diag(x=x, nrow=nrow, ncol=ncol) orm.scores <- function (y, X, link=c("logistic", "probit", "cauchit", "loglog", "cloglog")) { y <- as.numeric(factor(y)) if(!is.matrix(X)) X = matrix(X, ncol=1) N = length(y) ny = length(table(y)) na = ny - 1 nb = ncol(X) npar = na + nb Z = outer(y, 1:ny, "<=") mod <- orm(y~X, family=link[1], x=TRUE, y=TRUE) alpha = mod$coeff[1:na] beta = mod$coeff[-(1:na)] dl.dtheta = matrix(0, N, npar) Gamma = matrix(0,N,na) dlow.dtheta = dhi.dtheta =dpij.dtheta= matrix(, npar, N) for (i in 1:N) { z = Z[i,] ## z has length ny x = X[i,] gamma <-1- mod$trans$cumprob(alpha + sum(beta*x)) diffgamma = diff(c(gamma,1)) diffgamma <- ifelse(diffgamma<1e-16, 1e-16, diffgamma) invgamma = 1/gamma invgamma <- ifelse(invgamma==Inf, 1e16, invgamma) invgamma2 = invgamma^2 invdiffgamma = 1/diffgamma invdiffgamma2 = invdiffgamma^2 phi = log(gamma / diffgamma) ## phi has length na Gamma[i,] = gamma dg.dphi = 1 - 1/(1 + exp(phi)) ## l is the log likelihood (6.3) in McCullagh (1980) dl.dphi = z[-ny] - z[-1] * dg.dphi t.dl.dphi = t(dl.dphi) dphi.dgamma = matrix(0,na,na) diag(dphi.dgamma) = invgamma + invdiffgamma if(na > 1) dphi.dgamma[cbind(1:(na-1), 2:na)] = -invdiffgamma[-na] tmp <- mod$trans$deriv(x=alpha + sum(beta*x), f=mod$trans$cumprob(alpha + sum(beta*x))) tmp <- ifelse(tmp<1e-16, 1e-16, tmp) dgamma.base <- - tmp #dgamma.base <- - mod$trans$deriv(x=alpha + sum(beta*x), f=mod$trans$cumprob(alpha + sum(beta*x))) dgamma.dalpha = diagn(dgamma.base) dgamma.dbeta = dgamma.base %o% x #dgamma.dtheta[i,,] = cbind(dgamma.dalpha, dgamma.dbeta) #### First derivatives of log-likelihood (score functions) dl.dalpha = dl.dphi %*% dphi.dgamma %*% dgamma.dalpha dl.dbeta = dl.dphi %*% dphi.dgamma %*% dgamma.dbeta dl.dtheta[i,] = c(dl.dalpha, dl.dbeta) if (y[i] == 1) { dlow.dtheta[,i] <- 0 } else { dlow.dtheta[,i] <- c(dgamma.dalpha[y[i]-1,], dgamma.dbeta[y[i]-1, ]) } if (y[i] == ny) { dhi.dtheta[,i] <- 0 } else { dhi.dtheta[,i] <- -c(dgamma.dalpha[y[i],], dgamma.dbeta[y[i],]) } } low = cbind(0, Gamma)[cbind(1:N, y)] hi = cbind(1-Gamma, 0)[cbind(1:N, y)] presid <- low - hi dpresid.dtheta <- dlow.dtheta - dhi.dtheta result <- list(dl.dtheta=dl.dtheta, d2l.dtheta.dtheta = -mod$info.matrix, presid=presid, dpresid.dtheta = dpresid.dtheta ) return(result) } logit.scores = function(y, X) { y <- as.numeric(factor(y)) if(!is.matrix(X)) X = matrix(X, ncol=1) ## N: number of subjects; ny: number of y categories N = length(y) ny = length(table(y)) ## na, nb: number of parameters in alpha and beta na = ny - 1 nb = ncol(X) npar = na + nb ## Z is defined as in McCullagh (1980) JRSSB 42:109-142 Z = outer(y, 1:ny, "<=") ## Fit proportional odds model and obtain the MLEs of parameters. mod = lrm(y ~ X, tol=1e-50, maxit=100) alpha = -mod$coeff[1:na] beta = -mod$coeff[-(1:na)] ## Scores are stored for individuals. dl.dtheta = matrix(NA, N, npar) ## Information matrices are stored as sums over all individuals. d2l.dalpha.dalpha = matrix(0,na,na) d2l.dalpha.dbeta = matrix(0,na,nb) d2l.dbeta.dbeta = matrix(0,nb,nb) d2l.dbeta.dalpha = matrix(0,nb,na) ## Predicted probabilities p0 and dp0.dtheta are stored for individuals. p0 = matrix(,N,ny) dp0.dtheta = array(0,c(N,ny,npar)) ## Cumulative probabilities Gamma = matrix(0,N,na) dgamma.dtheta = array(0,c(N,na,npar)) for (i in 1:N) { z = Z[i,] ## z has length ny x = X[i,] ## gamma and phi are defined as in McCullagh (1980) gamma = 1 - 1/(1 + exp(alpha + sum(beta*x))) ## gamma has length na diffgamma = diff(c(gamma,1)) diffgamma <- ifelse(diffgamma<1e-16, 1e-16, diffgamma) invgamma = 1/gamma invgamma2 = invgamma^2 invdiffgamma = 1/diffgamma invgamma <- ifelse(invgamma==Inf, 1e16, invgamma) invdiffgamma2 = invdiffgamma^2 phi = log(gamma / diffgamma) ## phi has length na Gamma[i,] = gamma #### Some intermediate derivatives ## g(phi) = log(1+exp(phi)) dg.dphi = 1 - 1/(1 + exp(phi)) ## l is the log likelihood (6.3) in McCullagh (1980) dl.dphi = z[-ny] - z[-1] * dg.dphi t.dl.dphi = t(dl.dphi) ## dphi.dgamma is a na*na matrix with rows indexed by phi ## and columns indexed by gamma dphi.dgamma = matrix(0,na,na) diag(dphi.dgamma) = invgamma + invdiffgamma if(na > 1) dphi.dgamma[cbind(1:(na-1), 2:na)] = -invdiffgamma[-na] dgamma.base = gamma * (1-gamma) dgamma.base <- ifelse(dgamma.base<1e-16, 1e-16, dgamma.base) dgamma.dalpha = diagn(dgamma.base) dgamma.dbeta = dgamma.base %o% x dgamma.dtheta[i,,] = cbind(dgamma.dalpha, dgamma.dbeta) d2gamma.base = gamma * (1-gamma) * (1-2*gamma) ## d2l.dphi.dphi = diagn(-z[-1] * dg.dphi * (1-dg.dphi)) d2l.dphi.dalpha = d2l.dphi.dphi %*% dphi.dgamma %*% dgamma.dalpha d2l.dphi.dbeta = d2l.dphi.dphi %*% dphi.dgamma %*% dgamma.dbeta ## d2phi.dgamma.dalpha = array(0,c(na,na,na)) d2phi.dgamma.dalpha[cbind(1:na,1:na,1:na)] = (-invgamma2 + invdiffgamma2) * dgamma.base if(na > 1) { d2phi.dgamma.dalpha[cbind(1:(na-1),1:(na-1),2:na)] = -invdiffgamma2[-na] * dgamma.base[-1] d2phi.dgamma.dalpha[cbind(1:(na-1),2:na,1:(na-1))] = -invdiffgamma2[-na] * dgamma.base[-na] d2phi.dgamma.dalpha[cbind(1:(na-1),2:na,2:na)] = invdiffgamma2[-na] * dgamma.base[-1] } ## d2phi.dgamma.dbeta = array(0,c(na,na,nb)) rowdiff = matrix(0,na,na) diag(rowdiff) = 1 if(na > 1) rowdiff[cbind(1:(na-1),2:na)] = -1 d2phi.dgamma.dbeta.comp1 = diagn(-invdiffgamma2) %*% rowdiff %*% dgamma.dbeta d2phi.dgamma.dbeta.comp2 = diagn(-invgamma2) %*% dgamma.dbeta - d2phi.dgamma.dbeta.comp1 for(j in 1:na) { d2phi.dgamma.dbeta[j,j,] = d2phi.dgamma.dbeta.comp2[j,] if(j < na) d2phi.dgamma.dbeta[j,j+1,] = d2phi.dgamma.dbeta.comp1[j,] } ## d2gamma.dalpha.dbeta = array(0,c(na,na,nb)) for(j in 1:na) d2gamma.dalpha.dbeta[j,j,] = d2gamma.base[j] %o% x ## d2gamma.dbeta.dbeta = d2gamma.base %o% x %o% x #### First derivatives of log-likelihood (score functions) dl.dalpha = dl.dphi %*% dphi.dgamma %*% dgamma.dalpha dl.dbeta = dl.dphi %*% dphi.dgamma %*% dgamma.dbeta dl.dtheta[i,] = c(dl.dalpha, dl.dbeta) #### Second derivatives of log-likelihood #### Since first derivative is a sum of terms each being a*b*c, #### second derivative is a sum of terms each being (a'*b*c+a*b'*c+a*b*c'). #### d2l.dalpha.dalpha ## Obtain aprime.b.c ## Transpose first so that matrix multiplication is meaningful. ## Then transpose so that column is indexed by second alpha. aprime.b.c = t(crossprod(d2l.dphi.dalpha, dphi.dgamma %*% dgamma.dalpha)) ## Obtain a.bprime.c ## run through the index of second alpha a.bprime.c = matrix(,na,na) for(j in 1:na) a.bprime.c[,j] = t.dl.dphi %*% d2phi.dgamma.dalpha[,,j] %*% dgamma.dalpha ## Obtain a.b.cprime ## cprime = d2gamma.dalpha.dalpha = 0 if indices of the two alphas differ. d2gamma.dalpha.dalpha = diagn(d2gamma.base) a.b.cprime = diagn(as.vector(dl.dphi %*% dphi.dgamma %*% d2gamma.dalpha.dalpha)) ## summing over individuals d2l.dalpha.dalpha = aprime.b.c + a.bprime.c + a.b.cprime + d2l.dalpha.dalpha #### d2l.dalpha.dbeta aprime.b.c = t(crossprod(d2l.dphi.dbeta, dphi.dgamma %*% dgamma.dalpha)) a.bprime.c = a.b.cprime = matrix(,na,nb) for(j in 1:nb) { a.bprime.c[,j] = t.dl.dphi %*% d2phi.dgamma.dbeta[,,j] %*% dgamma.dalpha a.b.cprime[,j] = t.dl.dphi %*% dphi.dgamma %*% d2gamma.dalpha.dbeta[,,j] } d2l.dalpha.dbeta = aprime.b.c + a.bprime.c + a.b.cprime + d2l.dalpha.dbeta aprime.b.c = t(crossprod(d2l.dphi.dbeta, dphi.dgamma %*% dgamma.dbeta)) a.bprime.c = a.b.cprime = matrix(,nb,nb) for(j in 1:nb) { a.bprime.c[,j] = t.dl.dphi %*% d2phi.dgamma.dbeta[,,j] %*% dgamma.dbeta a.b.cprime[,j] = t.dl.dphi %*% dphi.dgamma %*% d2gamma.dbeta.dbeta[,,j] } d2l.dbeta.dbeta = aprime.b.c + a.bprime.c + a.b.cprime + d2l.dbeta.dbeta } #### Final assembly d2l.dtheta.dtheta = rbind( cbind(d2l.dalpha.dalpha, d2l.dalpha.dbeta), cbind(t(d2l.dalpha.dbeta), d2l.dbeta.dbeta)) npar.z = dim(dl.dtheta)[2] dlow.dtheta = dhi.dtheta = matrix(, npar.z, N) for(i in 1:N) { if (y[i] == 1) { dlow.dtheta[,i] <- 0 } else { dlow.dtheta[,i] <- dgamma.dtheta[i,y[i]-1,] } if (y[i] == ny) { dhi.dtheta[,i] <- 0 } else { dhi.dtheta[,i] <- -dgamma.dtheta[i,y[i],] } } low = cbind(0, Gamma)[cbind(1:N, y)] hi = cbind(1-Gamma, 0)[cbind(1:N, y)] presid <- low - hi dpresid.dtheta <- dlow.dtheta - dhi.dtheta result <- list(dl.dtheta=dl.dtheta, d2l.dtheta.dtheta = -mod$info.matrix, presid=presid, dpresid.dtheta = dpresid.dtheta ) return(result) } corTS.NA <- function(ind.x, ind.y, presid.x, presid.y, dl.dtheta.x, dl.dtheta.y, d2l.dtheta.dtheta.x, d2l.dtheta.dtheta.y, dpresid.dtheta.x, dpresid.dtheta.y, fisher=TRUE){ ind <- ind.x*ind.y n <- length(ind) npar.xz = dim(dl.dtheta.x)[2] npar.yz = dim(dl.dtheta.y)[2] Ntheta = npar.xz + npar.yz + 6 xz.dl.dtheta <- matrix(0, n, npar.xz) xz.dl.dtheta[which(ind.x==1), ] <- dl.dtheta.x yz.dl.dtheta <- matrix(0, n, npar.yz) yz.dl.dtheta[which(ind.y==1), ] <- dl.dtheta.y xz.d2l.dtheta.dtheta <- as.matrix(d2l.dtheta.dtheta.x) yz.d2l.dtheta.dtheta <- as.matrix(d2l.dtheta.dtheta.y) xresid <- rep(0, n) yresid <- rep(0, n) xresid[which(ind.x==1)] <- presid.x xresid[which(ind==0)] <- 0 yresid[which(ind.y==1)] <- presid.y yresid[which(ind==0)] <- 0 dxresid.dthetax <- matrix(0, npar.xz, n) dyresid.dthetay <- matrix(0, npar.yz, n) dxresid.dthetax[, which(ind.x==1)] <- dpresid.dtheta.x dxresid.dthetax[, which(ind==0)] <- 0 dyresid.dthetay[, which(ind.y==1)] <- dpresid.dtheta.y dyresid.dthetay[, which(ind.y==0)] <- 0 TS = cor(xresid[which(ind==1)], yresid[which(ind==1)]) xresid2 = xresid^2 yresid2 = yresid^2 xbyyresid <- xresid * yresid mean.xresid <- mean(xresid[which(ind==1)]) mean.yresid <- mean(yresid[which(ind==1)]) mean.xbyyresid = mean(xbyyresid[which(ind==1)]) mean.xresid2 <- mean(xresid[which(ind==1)]^2) mean.yresid2 <- mean(yresid[which(ind==1)]^2) bigphi <- cbind(xz.dl.dtheta, yz.dl.dtheta, (mean.xresid - xresid)*ind, (mean.yresid - yresid)*ind, (mean.xbyyresid - xbyyresid)*ind, (mean.xresid2 - xresid2)*ind, (mean.yresid2 - yresid2)*ind, 0) A = matrix(0,Ntheta,Ntheta) A[1:npar.xz, 1:npar.xz] <- xz.d2l.dtheta.dtheta A[npar.xz+(1:npar.yz), npar.xz+(1:npar.yz)] = yz.d2l.dtheta.dtheta A[Ntheta-6+(1:6), Ntheta-6+(1:6)] = diag(sum(ind), 6) bigpartial = rbind(c( dxresid.dthetax %*% ind, rep(0, npar.yz)), c( rep(0, npar.xz), dyresid.dthetay %*% ind ), c(dxresid.dthetax %*% yresid, dyresid.dthetay %*% xresid), c(dxresid.dthetax %*% (2*xresid), rep(0, npar.yz)), c(rep(0, npar.xz), dyresid.dthetay %*% (2*yresid))) A[Ntheta-6+(1:5), 1:(npar.xz+npar.yz)] = -bigpartial ## TS also equals numTS / sqrt(varprod) = numTS * revsvp numTS = mean.xbyyresid - mean.xresid * mean.yresid var.xresid = mean.xresid2 - mean.xresid^2 var.yresid = mean.yresid2 - mean.yresid^2 varprod = var.xresid * var.yresid revsvp = 1/sqrt(varprod) dTS.dvarprod = numTS * (-0.5) * revsvp^3 smallpartial = sum(ind)*c(-mean.yresid * revsvp + dTS.dvarprod * (-2*mean.xresid*var.yresid), -mean.xresid * revsvp + dTS.dvarprod * (-2*mean.yresid*var.xresid), revsvp, dTS.dvarprod * var.yresid, dTS.dvarprod * var.xresid) A[Ntheta, Ntheta-6+(1:5)] = -smallpartial SS = solve(A, t(bigphi)) var.theta = tcrossprod (SS, SS) varTS = var.theta[Ntheta, Ntheta] pvalTS = 2 * pnorm( -abs(TS)/sqrt(varTS)) if (fisher==TRUE){ ####Fisher's transformation TS_f <- log( (1+TS)/(1-TS) ) var.TS_f <- varTS*(2/(1-TS^2))^2 pvalTS <- 2 * pnorm( -abs(TS_f)/sqrt(var.TS_f)) } result <- c(TS, varTS, pvalTS) names(result) <- c("TS", "var.TS", "pval.TS") return(result) } index <- matrix(NA, dim(d2)[1], dim(d2)[2]) for(i in 1:dim(index)[2]){ index[,i] <- !is.na(d2[,i]) } Sys.time() presid <- dl.dtheta <- d2l.dtheta.dtheta <- dpresid.dtheta <- list() for (i in 1:dim(d2)[2]){ tryCatch({ d.tmp <- data.frame(d2[,i], adj.var) colnames(d.tmp)[1] <- colnames(d2)[i] d.tmp.c <- d.tmp[complete.cases(d.tmp), ] X=model.matrix(~ urban + rcs(age) + marital + religion + language, d.tmp.c) #mod <- orm(d2[index[,i],i]~ urban + age + marital + religion + language, data=d.tmp.c ) #X=model.matrix(~ urban, d.tmp.c) zzint <- match("(Intercept)", colnames(X), nomatch = 0L) if(zzint > 0L) { X <- X[, -zzint, drop = FALSE] } if(length(unique(d2[index[,i],i]))>2 ){ score <- orm.scores(d.tmp.c[,1], X) dl.dtheta[[i]] <- score$dl.dtheta d2l.dtheta.dtheta[[i]] <- score$d2l.dtheta.dtheta presid[[i]] <- score$presid dpresid.dtheta[[i]] <- score$dpresid.dtheta } else if(length(unique(d2[index[,i],i]))==2){ score <- logit.scores(d.tmp.c[,1], X) dl.dtheta[[i]] <- score$dl.dtheta d2l.dtheta.dtheta[[i]] <- score$d2l.dtheta.dtheta presid[[i]] <- score$presid dpresid.dtheta[[i]] <- score$dpresid.dtheta } }, error=function(e) print(paste(i, colnames(d2)[i],":", e)) ) } Sys.time() ts.est <- ts.var <- ts.pval <- matrix(NA, nrow=dim(d2)[2], ncol=dim(d2)[2]) for(i in 1: (dim(d2)[2]-1)){ for (j in (i+1):dim(d2)[2]){ tryCatch({ result <- corTS.NA(ind.x=index[,i], ind.y=index[,j], presid.x=presid[[i]], presid.y=presid[[j]], dl.dtheta.x=dl.dtheta[[i]], dl.dtheta.y=dl.dtheta[[j]], d2l.dtheta.dtheta.x=d2l.dtheta.dtheta[[i]], d2l.dtheta.dtheta.y=d2l.dtheta.dtheta[[j]], dpresid.dtheta.x=dpresid.dtheta[[i]], dpresid.dtheta.y=dpresid.dtheta[[j]], fisher=TRUE) ts.est[i, j] <- result[1] ts.var[i, j] <- result[2] ts.pval[i, j] <- result[3] }, error=function(e) print(paste(i, "-", j, ":", e)) ) } print(paste(i, " :",Sys.time())) } diag(ts.est) <- rep(1, dim(ts.est)[2]) ts.est[lower.tri(ts.est)] <- t(ts.est)[lower.tri(ts.est)] diag(ts.var) <- rep(1, dim(ts.var)[2]) ts.var[lower.tri(ts.var)] <- t(ts.var)[lower.tri(ts.var)] diag(ts.pval) <- rep(1, dim(ts.pval)[2]) ts.pval[lower.tri(ts.pval)] <- t(ts.pval)[lower.tri(ts.pval)] ci=0.95 ts_f <- log( (1+ts.est)/(1-ts.est) ) var_f <- ts.var*(2/(1-ts.est^2))^2 lower_f <- ts_f - abs(qnorm(0.5*(1-ci)))*sqrt(var_f) upper_f <- ts_f + abs(qnorm(0.5*(1-ci)))*sqrt(var_f) lower <- (exp(lower_f)-1)/(1+exp(lower_f)) upper <- (exp(upper_f)-1)/(1+exp(upper_f)) logit.ts <- ts.est logit.lb <- lower logit.ub <- upper logit.p <- ts.pval diag(logit.lb) <- diag(logit.ub) <- diag(logit.p) <- NA ###### using the cloglog link function Sys.time() presid <- dl.dtheta <- d2l.dtheta.dtheta <- dpresid.dtheta <- list() for (i in 1:dim(d2)[2]){ tryCatch({ d.tmp <- data.frame(d2[,i], adj.var) colnames(d.tmp)[1] <- colnames(d2)[i] d.tmp.c <- d.tmp[complete.cases(d.tmp), ] X=model.matrix(~ urban + rcs(age) + marital + religion + language, d.tmp.c) #mod <- orm(d2[index[,i],i]~ urban + age + marital + religion + language, data=d.tmp.c ) #X=model.matrix(~ urban, d.tmp.c) zzint <- match("(Intercept)", colnames(X), nomatch = 0L) if(zzint > 0L) { X <- X[, -zzint, drop = FALSE] } if(length(unique(d2[index[,i],i]))>2 ){ score <- orm.scores(d.tmp.c[,1], X, link="cloglog") dl.dtheta[[i]] <- score$dl.dtheta d2l.dtheta.dtheta[[i]] <- score$d2l.dtheta.dtheta presid[[i]] <- score$presid dpresid.dtheta[[i]] <- score$dpresid.dtheta } else if(length(unique(d2[index[,i],i]))==2){ score <- logit.scores(d.tmp.c[,1], X) dl.dtheta[[i]] <- score$dl.dtheta d2l.dtheta.dtheta[[i]] <- score$d2l.dtheta.dtheta presid[[i]] <- score$presid dpresid.dtheta[[i]] <- score$dpresid.dtheta } }, error=function(e) print(paste(i, colnames(d2)[i],":", e)) ) } Sys.time() ts.est <- ts.var <- ts.pval <- matrix(NA, nrow=dim(d2)[2], ncol=dim(d2)[2]) for(i in 1: (dim(d2)[2]-1)){ for (j in (i+1):dim(d2)[2]){ tryCatch({ result <- corTS.NA(ind.x=index[,i], ind.y=index[,j], presid.x=presid[[i]], presid.y=presid[[j]], dl.dtheta.x=dl.dtheta[[i]], dl.dtheta.y=dl.dtheta[[j]], d2l.dtheta.dtheta.x=d2l.dtheta.dtheta[[i]], d2l.dtheta.dtheta.y=d2l.dtheta.dtheta[[j]], dpresid.dtheta.x=dpresid.dtheta[[i]], dpresid.dtheta.y=dpresid.dtheta[[j]], fisher=TRUE) ts.est[i, j] <- result[1] ts.var[i, j] <- result[2] ts.pval[i, j] <- result[3] }, error=function(e) print(paste(i, "-", j, ":", e)) ) } print(paste(i, " :",Sys.time())) } diag(ts.est) <- rep(1, dim(ts.est)[2]) ts.est[lower.tri(ts.est)] <- t(ts.est)[lower.tri(ts.est)] diag(ts.var) <- rep(1, dim(ts.var)[2]) ts.var[lower.tri(ts.var)] <- t(ts.var)[lower.tri(ts.var)] diag(ts.pval) <- rep(1, dim(ts.pval)[2]) ts.pval[lower.tri(ts.pval)] <- t(ts.pval)[lower.tri(ts.pval)] ci=0.95 ts_f <- log( (1+ts.est)/(1-ts.est) ) var_f <- ts.var*(2/(1-ts.est^2))^2 lower_f <- ts_f - abs(qnorm(0.5*(1-ci)))*sqrt(var_f) upper_f <- ts_f + abs(qnorm(0.5*(1-ci)))*sqrt(var_f) lower <- (exp(lower_f)-1)/(1+exp(lower_f)) upper <- (exp(upper_f)-1)/(1+exp(upper_f)) cloglog.ts <- ts.est cloglog.lb <- lower cloglog.ub <- upper cloglog.p <- ts.pval diag(cloglog.lb) <- diag(cloglog.ub) <- diag(cloglog.p) <- NA spearman.ts <- matrix(NA, nrow=dim(d2)[2], ncol=dim(d2)[2]) for(i in 1: (dim(d2)[2]-1)){ for (j in (i+1):dim(d2)[2]){ d.tmp <- cbind(d2[, c(i, j)]) d.tmp <- d.tmp[complete.cases(d.tmp), ] spearman.ts[i, j] <- cor(d.tmp[,1], d.tmp[,2], method="spearman") } } ydiag(spearman.ts) <- rep(1, dim(spearman.ts)[2]) spearman.ts[lower.tri(spearman.ts)] <- t(spearman.ts)[lower.tri(spearman.ts)] spearman.ts <- ifelse(is.na(spearman.ts), 0, spearman.ts) save( logit.ts, logit.lb, logit.ub, logit.p, cloglog.ts, cloglog.lb, cloglog.ub, cloglog.p, spearman.ts, question, d3, sec.1.q, sec.2.q, sec.3.q, sec.4.q, sec.5.q, sec.6.q, sec.7.q, sec.8.q,sec.9.q, sec.10.q, sec.11.q, sec.12.q, sec.13.q,file="scip_plot.RData") ######### Figure 3 #### load("scip_plot.RData") library(shiny) library(ggplot2) library(reshape2) library(Hmisc) library(rms) library(stats) if(input$Correlation=="Spearman") { result <- spearman.ts } else if (input$Correlation=="corPSR"){ result <- logit.ts } else if (input$Correlation=="difference"){ result <- spearman.ts - logit.ts } x.index <- y.index <- 1:171 plot1 <- ggplot(melt(result[x.index, y.index]), aes(Var1, Var2, fill = value)) + geom_tile() + xlab("Q1") + ylab("Q2")+ scale_fill_gradient2(low = "blue", high = "red",limits=c(-1, 1)) + scale_x_continuous(breaks=cumsum(c(length(sec.10.q),length(sec.11.q),length(sec.12.q),length(sec.2.q),length(sec.6.q),length(sec.5.q),length(sec.8.q),length(sec.7.q),length(sec.9.q),length(sec.13.q),length(sec.4.q),length(sec.1.q),length(sec.3.q)))+0.5, labels=rep("", 13))+ scale_y_continuous(breaks=cumsum(c(length(sec.10.q),length(sec.11.q),length(sec.12.q),length(sec.2.q),length(sec.6.q),length(sec.5.q),length(sec.8.q),length(sec.7.q),length(sec.9.q),length(sec.13.q),length(sec.4.q),length(sec.1.q),length(sec.3.q)))+0.5, labels=rep("", 13)) plot1 <- plot1 + theme(plot.margin=unit(c(0,0,0,0),"mm"), axis.ticks = element_blank()) plot1 ############################################################ ######### Biomarker Analysis ############################################################ rm(list=ls()) setwd("~/Documents/datasets/hiv-biomarkers") d1<-read.csv("LiNC_&_CCC_cytokines_cohort_19March12.csv") d1<-d1[d1$study_id!="",] d2<-read.csv('AIACK23Study_DATA_2016-01-12_1340.csv') d2<-d2[!is.na(d2$hiv_suppr),] ## taking only those who are HIV positive (all of whom happen to have suppressed VL) sum(as.numeric(as.character(d1$hiv_viral_load))<=1000,na.rm=TRUE) sum(as.numeric(as.character(d1$hiv_viral_load))<=500,na.rm=TRUE) sum(as.numeric(as.character(d1$hiv_viral_load))<=400,na.rm=TRUE) sum(as.numeric(as.character(d1$hiv_viral_load))<=200,na.rm=TRUE) sum(as.numeric(as.character(d1$hiv_viral_load))<=100,na.rm=TRUE) sum(as.numeric(as.character(d1$hiv_viral_load))<=50,na.rm=TRUE) ##### Limiting the LiNC dataset to those with an undetectable viral load (VL<=400) d1$VL<-as.numeric(as.character(d1$hiv_viral_load)) d1<-d1[d1$VL<=400,] d1<-d1[!is.na(d1$VL),] summary(d1$age) summary(d2$age) table(d1$sex) table(d2$sex) # male=0, female=1 d1$male<-ifelse(d1$sex=="Male",1,0) d2$male<-ifelse(d2$sex==0,1,0) table(d1$male) table(d2$male) table(d1$race) table(d2$race) ## "caucasian"=0,"african american"=1,"hispanic"=2,"asian"=3,"other"=4 d1$nonwhite<-ifelse(d1$race=="non-white",1,0) d2$nonwhite<-ifelse(d2$race==0,0,1) table(d1$nonwhite) table(d2$nonwhite) table(d1$smoker) table(d2$smoker) ### 1 yes, 0 no d1$smoker<-ifelse(d1$smoker=="Yes",1,0) table(d1$routine_asa_use) table(d2$asa_daily) d1$asa_use<-ifelse(d1$routine_asa_use=="Yes",1,0) d2$asa_use<-d2$asa_daily table(d1$nsaid_use) table(d2$nsaid_use) d1$nsaid_use<-ifelse(d1$nsaid_use=="Yes",1,0) ######## summary(d1$bmi) summary(d2$calc_bmi) d2$bmi<-d2$calc_bmi summary(d1$cd4_count) summary(d2$cd4_enroll) d1$cd4<-d1$cd4_count d2$cd4<-d2$cd4_enroll summary(d1$hscrp) summary(d2$aiac_crc_hscrp) d2$hscrp<-d2$aiac_crc_hscrp ##### Appear to be very different scales summary(d1$leptin) summary(d2$aiac_leptin) summary(d1$leptin/62.5) #### The recommended conversion factor, although they still don't seem consistent d1$leptin.new<-d1$leptin/62.5 d2$leptin.new<-d2$aiac_leptin ##### Very different scales summary(d1$cd14) summary(d2$scd14) summary(d2$scd14/1000000) #### The recommended conversion factor; now seems OK d2$cd14<-d2$scd14/1000000 summary(d1$il_1_beta) # a lot more censoring summary(d2$il_1_beta) summary(d1$il_6) summary(d2$il_6) ########## The following are probably not usable based on John's email: ##### Much more censoring in d1 summary(d1$tnf_alpha) summary(d2$tnf_alpha) ##### Very different scales summary(d1$mcp_1a) summary(d2$mcp1) ##### Very different scales summary(d1$mip_1a) summary(d2$mip_1a) ##### different scales summary(d1$tnf_a_receptor_1) summary(d2$tnf_a_r1) ##### different scales summary(d1$tnf_a_receptor_2) summary(d2$tnf_a_r2) ##### Preparing to stack and stacking datasets d1$id<-as.character(d1$study_id) d1$study.linc<-1 d1a<-d1[,c("id","study.linc","male","age","nonwhite","smoker","bmi","cd4", "hscrp","leptin.new","cd14","il_1_beta","il_6")] d2$id<-as.character(d2$aiac_pid) d2$study.linc<-0 d2a<-d2[,c("id","study.linc","male","age","nonwhite","smoker","bmi","cd4", "hscrp","leptin.new","cd14","il_1_beta","il_6")] d<-rbind(d1a,d2a) d<-d[!is.na(d$leptin.new)&!is.na(d$cd14),] d$leptin<-d$leptin.new d$leptin.new<-NULL ### Pearson's correlations with(d, cor.test(hscrp,leptin)) with(d, cor.test(hscrp,cd14)) with(d, cor.test(hscrp,il_1_beta)) with(d, cor.test(hscrp,il_6)) with(d, cor.test(leptin,cd14)) with(d, cor.test(leptin,il_1_beta)) with(d, cor.test(leptin,il_6)) with(d, cor.test(cd14,il_1_beta)) with(d, cor.test(cd14,il_6)) with(d, cor.test(il_1_beta,il_6)) ### Spearman's correlations with(d, cor.test(hscrp,leptin,method="spearman")) with(d, cor.test(hscrp,cd14,method="spearman")) with(d, cor.test(hscrp,il_1_beta,method="spearman")) with(d, cor.test(hscrp,il_6,method="spearman")) with(d, cor.test(leptin,cd14,method="spearman")) with(d, cor.test(leptin,il_1_beta,method="spearman")) with(d, cor.test(leptin,il_6,method="spearman")) with(d, cor.test(cd14,il_1_beta,method="spearman")) with(d, cor.test(cd14,il_6,method="spearman")) with(d, cor.test(il_1_beta,il_6,method="spearman")) ####### Functions to be used diagn <- function(x=1, nrow=length(x), ncol=nrow) if(is.matrix(x)) diag(x=x) else diag(x=x, nrow=nrow, ncol=ncol) orm.scores <- function (y, X, link=c("logistic", "probit", "cauchit", "loglog", "cloglog")) { y <- as.numeric(factor(y)) if(!is.matrix(X)) X = matrix(X, ncol=1) N = length(y) ny = length(table(y)) na = ny - 1 nb = ncol(X) npar = na + nb Z = outer(y, 1:ny, "<=") mod <- orm(y~X, family=link[1], x=TRUE, y=TRUE) alpha = mod$coeff[1:na] beta = mod$coeff[-(1:na)] dl.dtheta = matrix(0, N, npar) Gamma = matrix(0,N,na) dlow.dtheta = dhi.dtheta =dpij.dtheta= matrix(, npar, N) for (i in 1:N) { z = Z[i,] ## z has length ny x = X[i,] gamma <-1- mod$trans$cumprob(alpha + sum(beta*x)) diffgamma = diff(c(gamma,1)) diffgamma <- ifelse(diffgamma<1e-16, 1e-16, diffgamma) invgamma = 1/gamma invgamma <- ifelse(invgamma==Inf, 1e16, invgamma) invgamma2 = invgamma^2 invdiffgamma = 1/diffgamma invdiffgamma2 = invdiffgamma^2 phi = log(gamma / diffgamma) ## phi has length na Gamma[i,] = gamma dg.dphi = 1 - 1/(1 + exp(phi)) ## l is the log likelihood (6.3) in McCullagh (1980) dl.dphi = z[-ny] - z[-1] * dg.dphi t.dl.dphi = t(dl.dphi) dphi.dgamma = matrix(0,na,na) diag(dphi.dgamma) = invgamma + invdiffgamma if(na > 1) dphi.dgamma[cbind(1:(na-1), 2:na)] = -invdiffgamma[-na] tmp <- mod$trans$deriv(x=alpha + sum(beta*x), f=mod$trans$cumprob(alpha + sum(beta*x))) tmp <- ifelse(tmp<1e-16, 1e-16, tmp) dgamma.base <- - tmp #dgamma.base <- - mod$trans$deriv(x=alpha + sum(beta*x), f=mod$trans$cumprob(alpha + sum(beta*x))) dgamma.dalpha = diagn(dgamma.base) dgamma.dbeta = dgamma.base %o% x #dgamma.dtheta[i,,] = cbind(dgamma.dalpha, dgamma.dbeta) #### First derivatives of log-likelihood (score functions) dl.dalpha = dl.dphi %*% dphi.dgamma %*% dgamma.dalpha dl.dbeta = dl.dphi %*% dphi.dgamma %*% dgamma.dbeta dl.dtheta[i,] = c(dl.dalpha, dl.dbeta) if (y[i] == 1) { dlow.dtheta[,i] <- 0 } else { dlow.dtheta[,i] <- c(dgamma.dalpha[y[i]-1,], dgamma.dbeta[y[i]-1, ]) } if (y[i] == ny) { dhi.dtheta[,i] <- 0 } else { dhi.dtheta[,i] <- -c(dgamma.dalpha[y[i],], dgamma.dbeta[y[i],]) } } low = cbind(0, Gamma)[cbind(1:N, y)] hi = cbind(1-Gamma, 0)[cbind(1:N, y)] presid <- low - hi dpresid.dtheta <- dlow.dtheta - dhi.dtheta result <- list(dl.dtheta=dl.dtheta, d2l.dtheta.dtheta = -mod$info.matrix, presid=presid, dpresid.dtheta = dpresid.dtheta ) return(result) } logit.scores = function(y, X) { y <- as.numeric(factor(y)) if(!is.matrix(X)) X = matrix(X, ncol=1) ## N: number of subjects; ny: number of y categories N = length(y) ny = length(table(y)) ## na, nb: number of parameters in alpha and beta na = ny - 1 nb = ncol(X) npar = na + nb ## Z is defined as in McCullagh (1980) JRSSB 42:109-142 Z = outer(y, 1:ny, "<=") ## Fit proportional odds model and obtain the MLEs of parameters. mod = lrm(y ~ X, tol=1e-50, maxit=100) alpha = -mod$coeff[1:na] beta = -mod$coeff[-(1:na)] ## Scores are stored for individuals. dl.dtheta = matrix(NA, N, npar) ## Information matrices are stored as sums over all individuals. d2l.dalpha.dalpha = matrix(0,na,na) d2l.dalpha.dbeta = matrix(0,na,nb) d2l.dbeta.dbeta = matrix(0,nb,nb) d2l.dbeta.dalpha = matrix(0,nb,na) ## Predicted probabilities p0 and dp0.dtheta are stored for individuals. p0 = matrix(,N,ny) dp0.dtheta = array(0,c(N,ny,npar)) ## Cumulative probabilities Gamma = matrix(0,N,na) dgamma.dtheta = array(0,c(N,na,npar)) for (i in 1:N) { z = Z[i,] ## z has length ny x = X[i,] ## gamma and phi are defined as in McCullagh (1980) gamma = 1 - 1/(1 + exp(alpha + sum(beta*x))) ## gamma has length na diffgamma = diff(c(gamma,1)) diffgamma <- ifelse(diffgamma<1e-16, 1e-16, diffgamma) invgamma = 1/gamma invgamma2 = invgamma^2 invdiffgamma = 1/diffgamma invgamma <- ifelse(invgamma==Inf, 1e16, invgamma) invdiffgamma2 = invdiffgamma^2 phi = log(gamma / diffgamma) ## phi has length na Gamma[i,] = gamma #### Some intermediate derivatives ## g(phi) = log(1+exp(phi)) dg.dphi = 1 - 1/(1 + exp(phi)) ## l is the log likelihood (6.3) in McCullagh (1980) dl.dphi = z[-ny] - z[-1] * dg.dphi t.dl.dphi = t(dl.dphi) ## dphi.dgamma is a na*na matrix with rows indexed by phi ## and columns indexed by gamma dphi.dgamma = matrix(0,na,na) diag(dphi.dgamma) = invgamma + invdiffgamma if(na > 1) dphi.dgamma[cbind(1:(na-1), 2:na)] = -invdiffgamma[-na] dgamma.base = gamma * (1-gamma) dgamma.base <- ifelse(dgamma.base<1e-16, 1e-16, dgamma.base) dgamma.dalpha = diagn(dgamma.base) dgamma.dbeta = dgamma.base %o% x dgamma.dtheta[i,,] = cbind(dgamma.dalpha, dgamma.dbeta) d2gamma.base = gamma * (1-gamma) * (1-2*gamma) ## d2l.dphi.dphi = diagn(-z[-1] * dg.dphi * (1-dg.dphi)) d2l.dphi.dalpha = d2l.dphi.dphi %*% dphi.dgamma %*% dgamma.dalpha d2l.dphi.dbeta = d2l.dphi.dphi %*% dphi.dgamma %*% dgamma.dbeta ## d2phi.dgamma.dalpha = array(0,c(na,na,na)) d2phi.dgamma.dalpha[cbind(1:na,1:na,1:na)] = (-invgamma2 + invdiffgamma2) * dgamma.base if(na > 1) { d2phi.dgamma.dalpha[cbind(1:(na-1),1:(na-1),2:na)] = -invdiffgamma2[-na] * dgamma.base[-1] d2phi.dgamma.dalpha[cbind(1:(na-1),2:na,1:(na-1))] = -invdiffgamma2[-na] * dgamma.base[-na] d2phi.dgamma.dalpha[cbind(1:(na-1),2:na,2:na)] = invdiffgamma2[-na] * dgamma.base[-1] } ## d2phi.dgamma.dbeta = array(0,c(na,na,nb)) rowdiff = matrix(0,na,na) diag(rowdiff) = 1 if(na > 1) rowdiff[cbind(1:(na-1),2:na)] = -1 d2phi.dgamma.dbeta.comp1 = diagn(-invdiffgamma2) %*% rowdiff %*% dgamma.dbeta d2phi.dgamma.dbeta.comp2 = diagn(-invgamma2) %*% dgamma.dbeta - d2phi.dgamma.dbeta.comp1 for(j in 1:na) { d2phi.dgamma.dbeta[j,j,] = d2phi.dgamma.dbeta.comp2[j,] if(j < na) d2phi.dgamma.dbeta[j,j+1,] = d2phi.dgamma.dbeta.comp1[j,] } ## d2gamma.dalpha.dbeta = array(0,c(na,na,nb)) for(j in 1:na) d2gamma.dalpha.dbeta[j,j,] = d2gamma.base[j] %o% x ## d2gamma.dbeta.dbeta = d2gamma.base %o% x %o% x #### First derivatives of log-likelihood (score functions) dl.dalpha = dl.dphi %*% dphi.dgamma %*% dgamma.dalpha dl.dbeta = dl.dphi %*% dphi.dgamma %*% dgamma.dbeta dl.dtheta[i,] = c(dl.dalpha, dl.dbeta) #### Second derivatives of log-likelihood #### Since first derivative is a sum of terms each being a*b*c, #### second derivative is a sum of terms each being (a'*b*c+a*b'*c+a*b*c'). #### d2l.dalpha.dalpha ## Obtain aprime.b.c ## Transpose first so that matrix multiplication is meaningful. ## Then transpose so that column is indexed by second alpha. aprime.b.c = t(crossprod(d2l.dphi.dalpha, dphi.dgamma %*% dgamma.dalpha)) ## Obtain a.bprime.c ## run through the index of second alpha a.bprime.c = matrix(,na,na) for(j in 1:na) a.bprime.c[,j] = t.dl.dphi %*% d2phi.dgamma.dalpha[,,j] %*% dgamma.dalpha ## Obtain a.b.cprime ## cprime = d2gamma.dalpha.dalpha = 0 if indices of the two alphas differ. d2gamma.dalpha.dalpha = diagn(d2gamma.base) a.b.cprime = diagn(as.vector(dl.dphi %*% dphi.dgamma %*% d2gamma.dalpha.dalpha)) ## summing over individuals d2l.dalpha.dalpha = aprime.b.c + a.bprime.c + a.b.cprime + d2l.dalpha.dalpha #### d2l.dalpha.dbeta aprime.b.c = t(crossprod(d2l.dphi.dbeta, dphi.dgamma %*% dgamma.dalpha)) a.bprime.c = a.b.cprime = matrix(,na,nb) for(j in 1:nb) { a.bprime.c[,j] = t.dl.dphi %*% d2phi.dgamma.dbeta[,,j] %*% dgamma.dalpha a.b.cprime[,j] = t.dl.dphi %*% dphi.dgamma %*% d2gamma.dalpha.dbeta[,,j] } d2l.dalpha.dbeta = aprime.b.c + a.bprime.c + a.b.cprime + d2l.dalpha.dbeta aprime.b.c = t(crossprod(d2l.dphi.dbeta, dphi.dgamma %*% dgamma.dbeta)) a.bprime.c = a.b.cprime = matrix(,nb,nb) for(j in 1:nb) { a.bprime.c[,j] = t.dl.dphi %*% d2phi.dgamma.dbeta[,,j] %*% dgamma.dbeta a.b.cprime[,j] = t.dl.dphi %*% dphi.dgamma %*% d2gamma.dbeta.dbeta[,,j] } d2l.dbeta.dbeta = aprime.b.c + a.bprime.c + a.b.cprime + d2l.dbeta.dbeta } #### Final assembly d2l.dtheta.dtheta = rbind( cbind(d2l.dalpha.dalpha, d2l.dalpha.dbeta), cbind(t(d2l.dalpha.dbeta), d2l.dbeta.dbeta)) npar.z = dim(dl.dtheta)[2] dlow.dtheta = dhi.dtheta = matrix(, npar.z, N) for(i in 1:N) { if (y[i] == 1) { dlow.dtheta[,i] <- 0 } else { dlow.dtheta[,i] <- dgamma.dtheta[i,y[i]-1,] } if (y[i] == ny) { dhi.dtheta[,i] <- 0 } else { dhi.dtheta[,i] <- -dgamma.dtheta[i,y[i],] } } low = cbind(0, Gamma)[cbind(1:N, y)] hi = cbind(1-Gamma, 0)[cbind(1:N, y)] presid <- low - hi dpresid.dtheta <- dlow.dtheta - dhi.dtheta result <- list(dl.dtheta=dl.dtheta, d2l.dtheta.dtheta = -mod$info.matrix, presid=presid, dpresid.dtheta = dpresid.dtheta ) return(result) } corTS.NA <- function(ind.x, ind.y, presid.x, presid.y, dl.dtheta.x, dl.dtheta.y, d2l.dtheta.dtheta.x, d2l.dtheta.dtheta.y, dpresid.dtheta.x, dpresid.dtheta.y, fisher=TRUE){ ind <- ind.x*ind.y n <- length(ind) npar.xz = dim(dl.dtheta.x)[2] npar.yz = dim(dl.dtheta.y)[2] Ntheta = npar.xz + npar.yz + 6 xz.dl.dtheta <- matrix(0, n, npar.xz) xz.dl.dtheta[which(ind.x==1), ] <- dl.dtheta.x yz.dl.dtheta <- matrix(0, n, npar.yz) yz.dl.dtheta[which(ind.y==1), ] <- dl.dtheta.y xz.d2l.dtheta.dtheta <- as.matrix(d2l.dtheta.dtheta.x) yz.d2l.dtheta.dtheta <- as.matrix(d2l.dtheta.dtheta.y) xresid <- rep(0, n) yresid <- rep(0, n) xresid[which(ind.x==1)] <- presid.x xresid[which(ind==0)] <- 0 yresid[which(ind.y==1)] <- presid.y yresid[which(ind==0)] <- 0 dxresid.dthetax <- matrix(0, npar.xz, n) dyresid.dthetay <- matrix(0, npar.yz, n) dxresid.dthetax[, which(ind.x==1)] <- dpresid.dtheta.x dxresid.dthetax[, which(ind==0)] <- 0 dyresid.dthetay[, which(ind.y==1)] <- dpresid.dtheta.y dyresid.dthetay[, which(ind.y==0)] <- 0 TS = cor(xresid[which(ind==1)], yresid[which(ind==1)]) xresid2 = xresid^2 yresid2 = yresid^2 xbyyresid <- xresid * yresid mean.xresid <- mean(xresid[which(ind==1)]) mean.yresid <- mean(yresid[which(ind==1)]) mean.xbyyresid = mean(xbyyresid[which(ind==1)]) mean.xresid2 <- mean(xresid[which(ind==1)]^2) mean.yresid2 <- mean(yresid[which(ind==1)]^2) bigphi <- cbind(xz.dl.dtheta, yz.dl.dtheta, (mean.xresid - xresid)*ind, (mean.yresid - yresid)*ind, (mean.xbyyresid - xbyyresid)*ind, (mean.xresid2 - xresid2)*ind, (mean.yresid2 - yresid2)*ind, 0) A = matrix(0,Ntheta,Ntheta) A[1:npar.xz, 1:npar.xz] <- xz.d2l.dtheta.dtheta A[npar.xz+(1:npar.yz), npar.xz+(1:npar.yz)] = yz.d2l.dtheta.dtheta A[Ntheta-6+(1:6), Ntheta-6+(1:6)] = diag(sum(ind), 6) bigpartial = rbind(c( dxresid.dthetax %*% ind, rep(0, npar.yz)), c( rep(0, npar.xz), dyresid.dthetay %*% ind ), c(dxresid.dthetax %*% yresid, dyresid.dthetay %*% xresid), c(dxresid.dthetax %*% (2*xresid), rep(0, npar.yz)), c(rep(0, npar.xz), dyresid.dthetay %*% (2*yresid))) A[Ntheta-6+(1:5), 1:(npar.xz+npar.yz)] = -bigpartial ## TS also equals numTS / sqrt(varprod) = numTS * revsvp numTS = mean.xbyyresid - mean.xresid * mean.yresid var.xresid = mean.xresid2 - mean.xresid^2 var.yresid = mean.yresid2 - mean.yresid^2 varprod = var.xresid * var.yresid revsvp = 1/sqrt(varprod) dTS.dvarprod = numTS * (-0.5) * revsvp^3 smallpartial = sum(ind)*c(-mean.yresid * revsvp + dTS.dvarprod * (-2*mean.xresid*var.yresid), -mean.xresid * revsvp + dTS.dvarprod * (-2*mean.yresid*var.xresid), revsvp, dTS.dvarprod * var.yresid, dTS.dvarprod * var.xresid) A[Ntheta, Ntheta-6+(1:5)] = -smallpartial SS = solve(A, t(bigphi)) var.theta = tcrossprod (SS, SS) varTS = var.theta[Ntheta, Ntheta] pvalTS = 2 * pnorm( -abs(TS)/sqrt(varTS)) if (fisher==TRUE){ ####Fisher's transformation TS_f <- log( (1+TS)/(1-TS) ) var.TS_f <- varTS*(2/(1-TS^2))^2 pvalTS <- 2 * pnorm( -abs(TS_f)/sqrt(var.TS_f)) } result <- c(TS, varTS, pvalTS) names(result) <- c("TS", "var.TS", "pval.TS") return(result) } ####### library(shiny) library(ggplot2) library(reshape2) library(Hmisc) library(rms) library(stats) library(PResiduals) adj.var <- d[, c("age", "male", "nonwhite", "bmi","smoker", "cd4","study.linc")] biomarker <- d[, c("hscrp","leptin","cd14","il_1_beta","il_6")] dd<-with(d, datadist(age,male,nonwhite,bmi,cd4,smoker)) options(datadist='dd') d3 <- biomarker spearman.est <- spearman.pval <- matrix(NA, nrow=dim(d3)[2], ncol=dim(d3)[2]) for(i in 1: (dim(d3)[2]-1)){ for (j in (i+1):dim(d3)[2]){ d.tmp <- cbind(d3[, c(i, j)]) d.tmp <- d.tmp[complete.cases(d.tmp), ] result <- cor.test(d.tmp[,1], d.tmp[,2], method="spearman", exact=FALSE) spearman.est[j, i] <- result$estimate spearman.pval[j, i] <- result$p.value } } index <- matrix(NA, dim(d3)[1], dim(d3)[2]) for(i in 1:dim(index)[2]){ index[,i] <- !is.na(d3[,i]) } Sys.time() presid <- dl.dtheta <- d2l.dtheta.dtheta <- dpresid.dtheta <- list() for (i in 1:dim(d3)[2]){ tryCatch({ d.tmp <- data.frame(d3[,i], adj.var) colnames(d.tmp)[1] <- colnames(d3)[i] d.tmp.c <- d.tmp[complete.cases(d.tmp), ] X=model.matrix(~ age+male+nonwhite+bmi+cd4+smoker+study.linc, d.tmp.c) zzint <- match("(Intercept)", colnames(X), nomatch = 0L) if(zzint > 0L) { X <- X[, -zzint, drop = FALSE] } if(length(unique(d3[index[,i],i]))>2 ){ score <- orm.scores(d.tmp.c[,1], X) dl.dtheta[[i]] <- score$dl.dtheta d2l.dtheta.dtheta[[i]] <- score$d2l.dtheta.dtheta presid[[i]] <- score$presid dpresid.dtheta[[i]] <- score$dpresid.dtheta } else if(length(unique(d3[index[,i],i]))==2){ score <- logit.scores(d.tmp.c[,1], X) dl.dtheta[[i]] <- score$dl.dtheta d2l.dtheta.dtheta[[i]] <- score$d2l.dtheta.dtheta presid[[i]] <- score$presid dpresid.dtheta[[i]] <- score$dpresid.dtheta } }, error=function(e) print(paste(i, colnames(d3)[i],":", e)) ) } Sys.time() ts.est <- ts.pval <- ts.var <- matrix(NA, nrow=dim(d3)[2], ncol=dim(d3)[2]) for(i in 1: (dim(d3)[2]-1)){ for (j in (i+1):dim(d3)[2]){ tryCatch({ result <- corTS.NA(ind.x=index[,i], ind.y=index[,j], presid.x=presid[[i]], presid.y=presid[[j]], dl.dtheta.x=dl.dtheta[[i]], dl.dtheta.y=dl.dtheta[[j]], d2l.dtheta.dtheta.x=d2l.dtheta.dtheta[[i]], d2l.dtheta.dtheta.y=d2l.dtheta.dtheta[[j]], dpresid.dtheta.x=dpresid.dtheta[[i]], dpresid.dtheta.y=dpresid.dtheta[[j]], fisher=TRUE) ts.est[i, j] <- result[1] ts.var[i, j] <- result[2] ts.pval[i, j] <- result[3] }, error=function(e) print(paste(i, "-", j, ":", e)) ) } print(paste(i, " :",Sys.time())) } diag(ts.est) <- rep(1, dim(ts.est)[2]) diag(ts.pval) <- rep(1, dim(ts.pval)[2]) ts.est[lower.tri(ts.est)] <- spearman.est[lower.tri(spearman.est)] ts.pval[lower.tri(ts.pval)] <- spearman.pval[lower.tri(spearman.pval)] ####upper.tri: spearman; lower.tri: corPSR ts.est <- t(ts.est) ts.pval <- t(ts.pval) x.index <- y.index <- 1:5 dat2 <- melt(ts.est[x.index, y.index]) dat.pval <- melt(ts.pval[x.index, y.index]) tmp <- subset(dat.pval, value<0.05) tmp$x <- tmp$Var1-0.5 tmp$y <- tmp$Var2-0.5 tmp$id <- 1:dim(tmp)[1] tmp2 <- tmp tmp2$y <- tmp$Var2+0.5 tmp3 <- tmp tmp3$x <- tmp$Var1+0.5 tmp3$y <- tmp$Var2+0.5 tmp4 <- tmp tmp4$x <- tmp$Var1+0.5 pval.plot <- rbind(tmp, tmp2, tmp3, tmp4) pval.plot <- pval.plot[order(pval.plot$id),c("id", "x", "y")] label(d3$hscrp)<- "High-sensitivity C-reactive protein (HSCRP)" label(d3$leptin)<- "Leptin" label(d3$cd14)<- "Soluble CD14" label(d3$il_1_beta)<- "Interleukin-1 beta" label(d3$il_6)<- "Interleukin-6" postscript("biomarker_heatmap_small.eps", horizontal = FALSE, onefile = FALSE, paper = "special", height = 8, width = 9.5) plot1 <- ggplot(dat2, aes(Var1, Var2, fill = value)) + geom_tile() + xlab("") + ylab("")+ scale_fill_gradient2(low = "blue", high = "red",limits=c(-1, 1)) plot1 <- plot1+geom_text(aes(fill = dat2$value, label = round(dat2$value, 2)), size=5) plot1 <- plot1 + theme(plot.margin=unit(c(0,0,0,0),"mm"), axis.ticks = element_blank()) plot1 <- plot1+ geom_polygon(aes(x=x, y=y, group=id),fill=NA, data=pval.plot, inherit.aes=FALSE, color = "black") plot1 <- plot1+ scale_x_continuous(breaks=cumsum(rep(1, dim(d3)[2])), labels=c("hsCRP", "Leptin", "sCD14", "IL-1-B", "IL-6")) + theme(axis.text=element_text(size=14)) +#theme(axis.text.x = element_text(angle = 90, hjust = 1, size=14))+ scale_y_continuous(breaks=cumsum(rep(1, dim(d3)[2])), labels=c("hsCRP", "Leptin", "sCD14", "IL-1-B", "IL-6")) #labels=label(d3)) plot1 dev.off() ### Confidence intervals for specific examples included in the text library(mada) #### fisher transformation to get better 95% CI getCI <- function(ts, var, fisher, ci=0.95){ if(!fisher){ lower <- ts - abs(qnorm(0.5*(1-ci)))*sqrt(var) upper <- ts + abs(qnorm(0.5*(1-ci)))*sqrt(var) } else { ts_f <- log( (1+ts)/(1-ts) ) var_f <- var*(2/(1-ts^2))^2 lower_f <- ts_f - abs(qnorm(0.5*(1-ci)))*sqrt(var_f) upper_f <- ts_f + abs(qnorm(0.5*(1-ci)))*sqrt(var_f) lower <- (exp(lower_f)-1)/(1+exp(lower_f)) upper <- (exp(upper_f)-1)/(1+exp(upper_f)) } return(cbind(lower, upper)) } ### IL_6 and cd14 rho<-cor(d3$il_6, d3$cd14, method="spearman") rho CIrho(rho,length(d3$il_6)) ### using mada package tanh(atanh(rho)+c(-1,1)*1.96/sqrt(length(d3$il_6)-3)) ### using formula for Pearson's correlation tanh(atanh(rho)+c(-1,1)*qt(.975,df=length(d3$il_6)-1)*sqrt((1+rho^2/2)/(length(d3$il_6)-3))) ### using formula of Bonnett DG, Wright TA (2000). Sample size requirements for estimating Pearson, Kendall, and Spearman correlations. Psychometrika 65, 23-28. tanh(atanh(rho)+c(-1,1)*qt(.975,df=length(d3$il_6)-1)*sqrt(1.06/(length(d3$il_6)-3))) ### using formula of Fieller EC, Hartley HO, Pearson ES (1957). Tests for rank correlation coefficients: I. Biometrika, 44, 470-481. #### With a big sample size like this, these are all very similar. #### Now the adjusted CI column.indexes<-c(which(colnames(d3)=="il_6"),which(colnames(d3)=="cd14")) large.index<-max(column.indexes) small.index<-min(column.indexes) getCI(ts.est[small.index,large.index], ts.var[small.index,large.index], fisher=TRUE) ### leptin and cd14 rho<-cor(d3$leptin, d3$cd14, method="spearman") rho CIrho(rho,length(d3$leptin)) ### using mada package tanh(atanh(rho)+c(-1,1)*1.96/sqrt(length(d3$leptin)-3)) ### using formula for Pearson's correlation tanh(atanh(rho)+c(-1,1)*qt(.975,df=length(d3$leptin)-1)*sqrt((1+rho^2/2)/(length(d3$leptin)-3))) ### using formula of Bonnett DG, Wright TA (2000). Sample size requirements for estimating Pearson, Kendall, and Spearman correlations. Psychometrika 65, 23-28. tanh(atanh(rho)+c(-1,1)*qt(.975,df=length(d3$leptin)-1)*sqrt(1.06/(length(d3$leptin)-3))) ### using formula of Fieller EC, Hartley HO, Pearson ES (1957). Tests for rank correlation coefficients: I. Biometrika, 44, 470-481. #### With a big sample size like this, these are all very similar. #### Now the adjusted CI column.indexes<-c(which(colnames(d3)=="leptin"),which(colnames(d3)=="cd14")) large.index<-max(column.indexes) small.index<-min(column.indexes) getCI(ts.est[small.index,large.index], ts.var[small.index,large.index], fisher=TRUE) ### IL_6 and hscrp rho<-cor(d3$il_6, d3$hscrp, method="spearman") rho CIrho(rho,length(d3$il_6)) ### using mada package tanh(atanh(rho)+c(-1,1)*1.96/sqrt(length(d3$il_6)-3)) ### using formula for Pearson's correlation tanh(atanh(rho)+c(-1,1)*qt(.975,df=length(d3$il_6)-1)*sqrt((1+rho^2/2)/(length(d3$il_6)-3))) ### using formula of Bonnett DG, Wright TA (2000). Sample size requirements for estimating Pearson, Kendall, and Spearman correlations. Psychometrika 65, 23-28. tanh(atanh(rho)+c(-1,1)*qt(.975,df=length(d3$il_6)-1)*sqrt(1.06/(length(d3$il_6)-3))) ### using formula of Fieller EC, Hartley HO, Pearson ES (1957). Tests for rank correlation coefficients: I. Biometrika, 44, 470-481. #### With a big sample size like this, these are all very similar. #### Now the adjusted CI column.indexes<-c(which(colnames(d3)=="il_6"),which(colnames(d3)=="hscrp")) large.index<-max(column.indexes) small.index<-min(column.indexes) getCI(ts.est[small.index,large.index], ts.var[small.index,large.index], fisher=TRUE) ############################## ######## Now Conditional Associations library(SparseM) library(rms) library(PResiduals) ################################################################################################ ######################### FUNCTIONS ########################################################### ################################################################################################ orm.scores <- function (y, X, link=c("logistic", "probit", "cauchit", "loglog", "cloglog")) { y <- as.numeric(factor(y)) if(!is.matrix(X)) X = matrix(X, ncol=1) N = length(y) ny = length(table(y)) na = ny - 1 nb = ncol(X) npar = na + nb Z = outer(y, 1:ny, "<=") mod <- orm(y~X, family=link[1], x=TRUE, y=TRUE) alpha = mod$coeff[1:na] beta = mod$coeff[-(1:na)] dl.dtheta = matrix(0, N, npar) Gamma = matrix(0,N,na) dlow.dtheta = dhi.dtheta =dpij.dtheta= matrix(, npar, N) for (i in 1:N) { z = Z[i,] ## z has length ny x = X[i,] gamma <-1- mod$trans$cumprob(alpha + sum(beta*x)) diffgamma = diff(c(gamma,1)) diffgamma <- ifelse(diffgamma<1e-16, 1e-16, diffgamma) invgamma = 1/gamma invgamma <- ifelse(invgamma==Inf, 1e16, invgamma) invgamma2 = invgamma^2 invdiffgamma = 1/diffgamma invdiffgamma2 = invdiffgamma^2 phi = log(gamma / diffgamma) ## phi has length na Gamma[i,] = gamma dg.dphi = 1 - 1/(1 + exp(phi)) ## l is the log likelihood (6.3) in McCullagh (1980) dl.dphi = z[-ny] - z[-1] * dg.dphi t.dl.dphi = t(dl.dphi) ## dphi.dgamma is a na*na matrix with rows indexed by phi ## and columns indexed by gamma dphi.dgamma = matrix(0,na,na) diag(dphi.dgamma) = invgamma + invdiffgamma if(na > 1) dphi.dgamma[cbind(1:(na-1), 2:na)] = -invdiffgamma[-na] tmp <- mod$trans$deriv(x=alpha + sum(beta*x), f=mod$trans$cumprob(alpha + sum(beta*x))) tmp <- ifelse(tmp<1e-16, 1e-16, tmp) dgamma.base <- - tmp dgamma.dalpha = diagn(dgamma.base) dgamma.dbeta = dgamma.base %o% x dl.dalpha = dl.dphi %*% dphi.dgamma %*% dgamma.dalpha dl.dbeta = dl.dphi %*% dphi.dgamma %*% dgamma.dbeta dl.dtheta[i,] = c(dl.dalpha, dl.dbeta) if (y[i] == 1) { dlow.dtheta[,i] <- 0 } else { dlow.dtheta[,i] <- c(dgamma.dalpha[y[i]-1,], dgamma.dbeta[y[i]-1, ]) } if (y[i] == ny) { dhi.dtheta[,i] <- 0 } else { dhi.dtheta[,i] <- -c(dgamma.dalpha[y[i],], dgamma.dbeta[y[i],]) } } low = cbind(0, Gamma)[cbind(1:N, y)] hi = cbind(1-Gamma, 0)[cbind(1:N, y)] presid <- low - hi dpresid.dtheta <- dlow.dtheta - dhi.dtheta result <- list(dl.dtheta=dl.dtheta, d2l.dtheta.dtheta = -mod$info.matrix, presid=presid, dpresid.dtheta = dpresid.dtheta ) return(result) } corTS = function(xresid, yresid, xz.dl.dtheta, yz.dl.dtheta, xz.d2l.dtheta.dtheta, yz.d2l.dtheta.dtheta, dxresid.dthetax, dyresid.dthetay,fisher=FALSE){ TS = cor(xresid, yresid) xresid2 = xresid^2 yresid2 = yresid^2 xbyyresid = xresid * yresid mean.xresid = mean(xresid) mean.yresid = mean(yresid) mean.xbyyresid = mean(xbyyresid) bigphi = cbind(xz.dl.dtheta, yz.dl.dtheta, mean.xresid - xresid, mean.yresid - yresid, mean.xbyyresid - xbyyresid, mean(xresid2)-xresid2, mean(yresid2)-yresid2, 0) npar.xz = dim(xz.dl.dtheta)[2] npar.yz = dim(yz.dl.dtheta)[2] Ntheta = npar.xz + npar.yz + 6 N = dim(xz.dl.dtheta)[1] A = matrix(0,Ntheta,Ntheta) A[1:npar.xz, 1:npar.xz] =xz.d2l.dtheta.dtheta A[npar.xz+(1:npar.yz), npar.xz+(1:npar.yz)] = yz.d2l.dtheta.dtheta A[Ntheta-6+(1:6), Ntheta-6+(1:6)] = diag(N, 6) bigpartial = rbind(c(dxresid.dthetax %*% rep(1, N), rep(0, npar.yz)), c(rep(0, npar.xz), dyresid.dthetay %*% rep(1, N)), c(dxresid.dthetax %*% yresid, dyresid.dthetay %*% xresid), c(dxresid.dthetax %*% (2*xresid), rep(0, npar.yz)), c(rep(0, npar.xz), dyresid.dthetay %*% (2*yresid))) A[Ntheta-6+(1:5), 1:(npar.xz+npar.yz)] = -bigpartial ## TS also equals numTS / sqrt(varprod) = numTS * revsvp numTS = mean.xbyyresid - mean.xresid * mean.yresid var.xresid = mean(xresid2) - mean.xresid^2 var.yresid = mean(yresid2) - mean.yresid^2 varprod = var.xresid * var.yresid revsvp = 1/sqrt(varprod) dTS.dvarprod = numTS * (-0.5) * revsvp^3 smallpartial = N * c(-mean.yresid * revsvp + dTS.dvarprod * (-2*mean.xresid*var.yresid), -mean.xresid * revsvp + dTS.dvarprod * (-2*mean.yresid*var.xresid), revsvp, dTS.dvarprod * var.yresid, dTS.dvarprod * var.xresid) A[Ntheta, Ntheta-6+(1:5)] = -smallpartial A[Ntheta, Ntheta-6+(1:5)] = -smallpartial SS = solve(A, t(bigphi)) var.theta = tcrossprod (SS, SS) varTS = var.theta[Ntheta, Ntheta] pvalTS = 2 * pnorm( -abs(TS)/sqrt(varTS)) if (fisher==TRUE){ ####Fisher's transformation TS_f <- log( (1+TS)/(1-TS) ) var.TS_f <- varTS*(2/(1-TS^2))^2 pvalTS <- 2 * pnorm( -abs(TS_f)/sqrt(var.TS_f)) } list(TS=TS,var.TS=varTS, pval.TS=pvalTS) } getCI <- function(ts, var, fisher, ci=0.95){ if(!fisher){ lower <- ts - abs(qnorm(0.5*(1-ci)))*sqrt(var) upper <- ts + abs(qnorm(0.5*(1-ci)))*sqrt(var) } else { ts_f <- log( (1+ts)/(1-ts) ) var_f <- var*(2/(1-ts^2))^2 lower_f <- ts_f - abs(qnorm(0.5*(1-ci)))*sqrt(var_f) upper_f <- ts_f + abs(qnorm(0.5*(1-ci)))*sqrt(var_f) lower <- (exp(lower_f)-1)/(1+exp(lower_f)) upper <- (exp(upper_f)-1)/(1+exp(upper_f)) } return(c(lower, upper)) } ###### obtaining PSRs from two orm models cocobot.orm <- function(formula, data, link.x=c("logistic", "probit", "cauchit", "loglog", "cloglog"), link.y=c("logistic", "probit", "cauchit", "loglog", "cloglog"), subset, na.action=getOption('na.action'), fisher=FALSE,conf.int=0.95) { # Construct the model frames for x ~ z and y ~ z F1 <- Formula(formula) Fx <- formula(F1, lhs=1) Fy <- formula(F1, lhs=2) mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data", "subset", "weights", "na.action", "offset"), names(mf), 0L) mf <- mf[c(1L, m)] mf$drop.unused.levels <- TRUE mf$na.action <- na.action # We set xlev to a benign non-value in the call so that it won't get partially matched # to any variable in the formula. For instance a variable named 'x' could possibly get # bound to xlev, which is not what we want. mf$xlev <- integer(0) mf[[1L]] <- as.name("model.frame") mx <- my <- mf # NOTE: we add the opposite variable to each model frame call so that # subsetting occurs correctly. Later we strip them off. mx[["formula"]] <- Fx yName <- all.vars(Fy[[2]])[1] mx[[yName]] <- Fy[[2]] my[["formula"]] <- Fy xName <- all.vars(Fx[[2]])[1] my[[xName]] <- Fx[[2]] mx <- eval(mx, parent.frame()) mx[[paste('(',yName,')',sep='')]] <- NULL my <- eval(my, parent.frame()) my[[paste('(',xName,')',sep='')]] <- NULL data.points <- nrow(mx) # Construct the model matrix z mxz <- model.matrix(attr(mx,'terms'),mx) zzint <- match("(Intercept)", colnames(mxz), nomatch = 0L) if(zzint > 0L) { mxz <- mxz[, -zzint, drop = FALSE] } myz <- model.matrix(attr(my,'terms'),my) zzint <- match("(Intercept)", colnames(myz), nomatch = 0L) if(zzint > 0L) { myz <- myz[, -zzint, drop = FALSE] } ## return value ans <- list( TS=list(), fisher=fisher, conf.int=conf.int, data.points=data.points ) score.xz <- orm.scores(y=model.response(mx), X=mxz, link=link.x) score.yz <- orm.scores(y=model.response(my), X=myz, link=link.y) ts = corTS(score.xz$presid, score.yz$presid, score.xz$dl.dtheta, score.yz$dl.dtheta, as.matrix(score.xz$d2l.dtheta.dtheta), as.matrix(score.yz$d2l.dtheta.dtheta), score.xz$dpresid.dtheta, score.yz$dpresid.dtheta,fisher) ts.label = "PResid vs. PResid" ans$TS$TS <- list( ts=ts$TS, var=ts$var.TS, pval=ts$pval.TS, label = ts.label) ans <- structure(ans, class="cocobot") # Apply confidence intervals for (i in seq_len(length(ans$TS))){ ts_ci <- getCI(ans$TS[[i]]$ts,ans$TS[[i]]$var,ans$fisher,conf.int) ans$TS[[i]]$lower <- ts_ci[1] ans$TS[[i]]$upper <- ts_ci[2] } ans } lm.scores = function(y, X){ N = length(y) mod = lm(y~X) smod = summary(mod) resid = smod$residuals d2l.dtheta.dtheta = -crossprod(cbind(1, X)) dl.dtheta <- resid*cbind(1, X) presid = 2*pnorm((y - mod$fitted.values)/smod$sigma) -1 dresid.dtheta = t(cbind(-1, -X)) dpresid.dtheta = t(cbind(-2*dnorm((y - mod$fitted.values)/smod$sigma)/smod$sigma, -2*dnorm((y - mod$fitted.values)/smod$sigma)/smod$sigma * X)) f.y<-density(resid) fy.ry <- NULL presid.k <- NULL for (i in 1:length(resid)){ fy.ry[i] <- f.y$y[which(abs(f.y$x-resid[i])==min(abs(f.y$x-resid[i])))] presid.k[i] <- sum(residresid[i])/length(resid) } dpresid.dtheta.k <- t(cbind(-2*fy.ry, -2*fy.ry*X)) list(mod = mod, dl.dtheta = dl.dtheta, d2l.dtheta.dtheta = d2l.dtheta.dtheta, resid = resid, dresid.dtheta = dresid.dtheta, presid = presid, presid.k= presid.k, dpresid.dtheta = dpresid.dtheta, dpresid.dtheta.k = dpresid.dtheta.k) } continuous.bot <- function(formula, data, subset, na.action=getOption('na.action'), emp=TRUE,fisher=FALSE,conf.int=0.95) { F1 <- Formula(formula) Fx <- formula(F1, lhs=1) Fy <- formula(F1, lhs=2) mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data", "subset", "weights", "na.action", "offset"), names(mf), 0L) mf <- mf[c(1L, m)] mf$drop.unused.levels <- TRUE mf$na.action <- na.action mf$xlev <- integer(0) mf[[1L]] <- as.name("model.frame") mx <- my <- mf mx[["formula"]] <- Fx yName <- all.vars(Fy[[2]])[1] mx[[yName]] <- Fy[[2]] my[["formula"]] <- Fy xName <- all.vars(Fx[[2]])[1] my[[xName]] <- Fx[[2]] mx <- eval(mx, parent.frame()) mx[[paste('(',yName,')',sep='')]] <- NULL my <- eval(my, parent.frame()) my[[paste('(',xName,')',sep='')]] <- NULL data.points <- nrow(mx) mxz <- model.matrix(attr(mx,'terms'),mx) zzint <- match("(Intercept)", colnames(mxz), nomatch = 0L) if(zzint > 0L) { mxz <- mxz[, -zzint, drop = FALSE] } myz <- model.matrix(attr(my,'terms'),my) zzint <- match("(Intercept)", colnames(myz), nomatch = 0L) if(zzint > 0L) { myz <- myz[, -zzint, drop = FALSE] } score.xz <- lm.scores(y=model.response(mx), X=mxz) score.yz <- lm.scores(y=model.response(my), X=myz) npar.xz = dim(score.xz$dl.dtheta)[2] npar.yz = dim(score.yz$dl.dtheta)[2] ans <- list( TS=list(), fisher=fisher, conf.int=conf.int, data.points=data.points ) if (emp==TRUE){ ### presid vs presid (emprical) tb = corTS(score.xz$presid.k, score.yz$presid.k, score.xz$dl.dtheta, score.yz$dl.dtheta, score.xz$d2l.dtheta.dtheta, score.yz$d2l.dtheta.dtheta, score.xz$dpresid.dtheta.k, score.yz$dpresid.dtheta.k,fisher) tb.label = "PResid vs. PResid (empirical)" } else { ### presid vs presid (use pdf of normal) tb = corTS(score.xz$presid, score.yz$presid, score.xz$dl.dtheta, score.yz$dl.dtheta, score.xz$d2l.dtheta.dtheta, score.yz$d2l.dtheta.dtheta, score.xz$dpresid.dtheta, score.yz$dpresid.dtheta,fisher) tb.label = "PResid vs. PResid (assume normality)" } ans$TS$TB <- list(ts=tb$TS, var=tb$var.TS, pval=tb$pval.TS,label = tb.label) tc <- corTS(score.xz$resid, score.yz$resid, score.xz$dl.dtheta, score.yz$dl.dtheta, score.xz$d2l.dtheta.dtheta, score.yz$d2l.dtheta.dtheta, score.xz$dresid.dtheta, score.yz$dresid.dtheta,fisher) ans$TS$TC <- list( ts=tc$TS, var=tc$var.TS, pval=tc$pval.TS,label = 'Obs-Exp vs. Obs-Exp') ans <- structure(ans, class="cocobot") for (i in seq_len(length(ans$TS))){ ts_ci <- getCI(ans$TS[[i]]$ts,ans$TS[[i]]$var,ans$fisher,conf.int) ans$TS[[i]]$lower <- ts_ci[1] ans$TS[[i]]$upper <- ts_ci[2] } return(ans) } as.numeric.factor <- function(x) {seq_along(levels(x))[x]} presid.orm <- function(object){ k <- object$non.slopes L <- object$linear.predictors trans <- object$trans cumprob <- if(length(trans)) trans$cumprob else plogis if(length(L)==0) stop('you did not use linear.predictors=TRUE for the fit') if(length(Y <- object$y) == 0) stop("you did not specify y=TRUE in the fit") if(!is.factor(Y)) Y <- factor(Y) Y <- unclass(Y) - 1 cof <- object$coefficients if(length(X <- object$x)==0) stop("you did not use x=TRUE in the fit") N <- length(Y) px <- 1 - cumprob(outer(cof[1:k], as.vector(X %*% cof[- (1:k)]), "+")) low.x = rbind(0, px)[cbind(Y + 1L, 1:N)] hi.x = 1 - rbind(px, 1)[cbind(Y + 1L, 1:N)] return(low.x - hi.x) } #### fisher transformation to get better 95% CI getCI <- function(ts, var, fisher, ci=0.95){ if(!fisher){ lower <- ts - abs(qnorm(0.5*(1-ci)))*sqrt(var) upper <- ts + abs(qnorm(0.5*(1-ci)))*sqrt(var) } else { ts_f <- log( (1+ts)/(1-ts) ) var_f <- var*(2/(1-ts^2))^2 lower_f <- ts_f - abs(qnorm(0.5*(1-ci)))*sqrt(var_f) upper_f <- ts_f + abs(qnorm(0.5*(1-ci)))*sqrt(var_f) lower <- (exp(lower_f)-1)/(1+exp(lower_f)) upper <- (exp(upper_f)-1)/(1+exp(upper_f)) } return(cbind(lower, upper)) } ####### conditional covariate-adjusted Spearman's rank correlation ##### function for obtaining the standard error of conditional estimate using large sample approximation prodTS.new <- function(x.resid, y.resid, z, xres2.method=c("emp", "constant", "model"), yres2.method=c("emp", "constant", "model"), xz.dl.dtheta, yz.dl.dtheta, xz.d2l.dtheta.dtheta, yz.d2l.dtheta.dtheta, dxresid.dtheta, dyresid.dtheta){ npar.xz <- dim(xz.dl.dtheta)[2] npar.yz <- dim(yz.dl.dtheta)[2] score.prod <- lm.scores(x.resid*y.resid, z) npar.prod <- dim(score.prod$dl.dtheta)[2] prod.dl.dtheta <- score.prod$dl.dtheta prod.d2l.dtheta.dtheta <- score.prod$d2l.dtheta.dtheta N <- dim(score.prod$dl.dtheta)[1] if(xres2.method[1]=="model"){ score.xres2 <- lm.scores(x.resid^2, z) npar.xres2 <- dim(score.xres2$dl.dtheta)[2] xres2.dl.dtheta <- score.xres2$dl.dtheta xres2.d2l.dtheta.dtheta <- score.xres2$d2l.dtheta.dtheta } else if (xres2.method[1]=="emp"){ xres2.dl.dtheta <- as.matrix(x.resid^2-mean(x.resid^2) ) npar.xres2 <- 1 xres2.d2l.dtheta.dtheta <- -N } else if (xres2.method[1]=="constant"){ npar.xres2 <- 0 xres2.dl.dtheta <- NULL xres2.d2l.dtheta.dtheta <- NULL } if(yres2.method[1]=="model"){ score.yres2 <- lm.scores(y.resid^2, z) npar.yres2 <- dim(score.yres2$dl.dtheta)[2] yres2.dl.dtheta <- score.yres2$dl.dtheta yres2.d2l.dtheta.dtheta <- score.yres2$d2l.dtheta.dtheta } else if (yres2.method[1]=="emp"){ yres2.dl.dtheta <- as.matrix( y.resid^2-mean(y.resid^2) ) npar.yres2 <- 1 yres2.d2l.dtheta.dtheta <- -N } else if (yres2.method[1]=="constant"){ npar.yres2 <- 0 yres2.dl.dtheta <- NULL yres2.d2l.dtheta.dtheta <- NULL } Ntheta <- npar.xz + npar.yz + npar.prod + npar.xres2 + npar.yres2 bigphi <- cbind(xz.dl.dtheta, yz.dl.dtheta, prod.dl.dtheta, xres2.dl.dtheta, yres2.dl.dtheta) A <- matrix(0, Ntheta, Ntheta) A[(1:npar.xz), (1:npar.xz)] <- as.matrix(xz.d2l.dtheta.dtheta) A[npar.xz + (1:npar.yz), npar.xz + (1:npar.yz)] <- as.matrix(yz.d2l.dtheta.dtheta) A[npar.xz + npar.yz + (1: npar.prod), npar.xz + npar.yz + (1: npar.prod)] <- prod.d2l.dtheta.dtheta if (npar.xres2>0){ A[npar.xz + npar.yz + npar.prod+ (1: npar.xres2), npar.xz + npar.yz + npar.prod+ (1: npar.xres2)] <- xres2.d2l.dtheta.dtheta } if(npar.yres2>0){ A[npar.xz + npar.yz + npar.prod+ npar.xres2+(1: npar.yres2), npar.xz + npar.yz + npar.prod+ npar.xres2+(1: npar.yres2)] <- yres2.d2l.dtheta.dtheta } par.1 <- t(y.resid * cbind(1, z)) %*% t(dxresid.dtheta) par.2 <- t(x.resid * cbind(1, z)) %*% t(dyresid.dtheta) A[npar.xz + npar.yz + (1: npar.prod), 1:npar.xz] <- par.1 A[npar.xz + npar.yz + (1: npar.prod), npar.xz + (1:npar.yz)] <- par.2 if (xres2.method[1]=="model"){ par.3 <- t(2*x.resid * cbind(1, z)) %*% t(dxresid.dtheta) } else if (xres2.method[1]=="emp"){ par.3 <- t(2*x.resid ) %*% t(dxresid.dtheta) } else if(xres2.method[1]=="constant"){ par.3 <- NULL } if(npar.xres2>0){ A[npar.xz + npar.yz + npar.prod+ (1: npar.xres2), 1:npar.xz] <- par.3 } if (yres2.method[1]=="model"){ par.4 <- t(2*y.resid * cbind(1, z)) %*% t(dyresid.dtheta) } else if(yres2.method[1]=="emp"){ par.4 <- t(2*y.resid ) %*% t(dyresid.dtheta) } else if(yres2.method[1]=="constant"){ par.4 <- NULL } if(npar.yres2>0){ A[npar.xz + npar.yz + npar.prod+ npar.xres2+(1: npar.yres2), npar.xz + (1:npar.yz)] <- par.4 } SS <- solve(A, t(bigphi)) var.theta <- tcrossprod(SS, SS) prod.coef <- score.prod$mod$coef names(prod.coef) <- paste("prod:", names(prod.coef)) if(xres2.method[1]=="model"){ xres2.coef <- score.xres2$mod$coef } else if (xres2.method[1]=="emp"){ xres2.coef <- mean(x.resid^2) } else if (xres2.method[1]=="constant"){ xres2.coef <- 1/3 } names(xres2.coef) <- paste("xres2:", names(xres2.coef)) if(yres2.method[1]=="model"){ yres2.coef <- score.yres2$mod$coef } else if (yres2.method[1]=="emp"){ yres2.coef <- mean(y.resid^2) } else if (yres2.method[1]=="constant"){ yres2.coef <- 1/3 } names(yres2.coef) <- paste("yres2:", names(yres2.coef)) #coef <- rbind(prod.coef, xres2.coef, yres2.coef) var.coef <- var.theta[(npar.xz+npar.yz+1): Ntheta , (npar.xz+npar.yz+1): Ntheta] #colnames(var.coef) <- rownames(var.coef) <- names(coef) #var.beta <- var.theta[(npar.xz+npar.yz+2):Ntheta, (npar.xz+npar.yz+2):Ntheta] #ts <- score.prod$mod$coef[-1] %*% solve(var.beta) %*% score.prod$mod$coef[-1] #p.val <- pchisq(ts, df=length(score.prod$mod$coef[-1]), lower.tail=FALSE) #result <- list(coef=score.prod$mod$coef, # var=var.theta[(npar.xz+npar.yz+1):Ntheta, (npar.xz+npar.yz+1):Ntheta], # TS=ts, # pval=p.val) result <- list(prod=prod.coef, xres2=xres2.coef, yres2=yres2.coef, var.coef=var.coef) return(result) } ##### kernel weighted conditional estimates for age library(gplm) library(boot) nw.kernel <- function(x, h, kernel){ ws <- sapply(x, FUN=function(i) kernel.function( (i-x)/h, kernel=kernel)/sum(kernel.function( (i-x)/h, kernel=kernel))) return(ws) } #### kernel weighted correlation between presiduals kernel.cor <- function(x, y, z, kernel="gaussian", h){ wt <- nw.kernel(z, h, kernel=kernel) weighted.cor <- apply(wt, 2, FUN=function(wt) corr(cbind(x, y), wt)) return(cbind(z, weighted.cor)) } #h=3 ########################## Looking at associations with leptin #### Bryan changed from as.factor(year) to year (continuous variable) 2017-01-07 to get it working cd14.m <- orm(cd14 ~ age+male+nonwhite+bmi+cd4+smoker+study.linc, data=d, x=TRUE, y=TRUE) leptin.m <- orm(leptin ~ age+male+nonwhite+bmi+cd4+smoker+study.linc, data=d, x=TRUE, y=TRUE) with(d, cor.test(cd14,bmi,method="spearman")) with(d, cor.test(leptin,bmi,method="spearman")) cd14.p <- presid.orm(cd14.m) leptin.p <- presid.orm(leptin.m) resid.prod <- cd14.p*leptin.p mxz <- model.matrix(as.formula("cd14 ~ age+male+nonwhite+bmi+cd4+smoker+study.linc"), data=d) mxz <- mxz[, -1] score.xz <- orm.scores(d$cd14, mxz) score.yz <- orm.scores(d$leptin, mxz) x.resid <- score.xz$presid y.resid <- score.yz$presid ##### Computing conditional partial correlations for several binary covariates: ##### z=1: LINC; 0: AIAC #z <- d$study.linc #z <- d$nonwhite #z <- d$smoker z <- d$male xz.dl.dtheta <- score.xz$dl.dtheta yz.dl.dtheta <- score.yz$dl.dtheta xz.d2l.dtheta.dtheta <- score.xz$d2l.dtheta.dtheta yz.d2l.dtheta.dtheta <- score.yz$d2l.dtheta.dtheta dxresid.dtheta <- score.xz$dpresid.dtheta dyresid.dtheta <- score.yz$dpresid.dtheta result <- prodTS.new(x.resid, y.resid, z, xres2.method="model", yres2.method="model", xz.dl.dtheta, yz.dl.dtheta, xz.d2l.dtheta.dtheta, yz.d2l.dtheta.dtheta, dxresid.dtheta, dyresid.dtheta) new.z <- cbind(c(1, 1), c(0,1)) est.prod <- new.z %*% result$prod est.xres2 <- new.z %*% result$xres2 est.yres2 <- new.z%*% result$yres2 ##### point estimates are very close to the bootstrap result est.gamma <- est.prod/ sqrt(est.xres2 * est.yres2) dgamma.dprod <- apply(new.z, 2, FUN=function(x) 1/sqrt(est.xres2*est.yres2)*x) dgamma.dxres2<- apply(new.z, 2, FUN=function(x) - est.gamma/2/est.xres2 *x) dgamma.dyres2 <- apply(new.z, 2, FUN=function(x) - est.gamma/2/est.yres2 *x) dgamma.dcoef <- cbind(dgamma.dprod, dgamma.dxres2, dgamma.dyres2) cov.gamma <- dgamma.dcoef %*% result$var.coef %*% t(dgamma.dcoef) var.gamma <-diag(dgamma.dcoef %*% result$var.coef %*% t(dgamma.dcoef)) result.ci <- getCI(est.gamma, var.gamma, fisher=TRUE, ci=0.95) ########### testing for difference cov.gamma <- dgamma.dcoef %*% result$var.coef %*% t(dgamma.dcoef) var.diff <- cov.gamma[1, 1] + cov.gamma[2,2] -2*cov.gamma[1,2] est.diff <- diff(est.gamma) p.val <- 2* pnorm(abs(est.diff/sqrt(var.diff)), lower.tail=FALSE) est.gamma result.ci p.val ####### conditional on bmi ##### z <- model.matrix(as.formula("cd14 ~ rcs(bmi,3) "), data=d) z <- z[,-1] xz.dl.dtheta <- score.xz$dl.dtheta yz.dl.dtheta <- score.yz$dl.dtheta xz.d2l.dtheta.dtheta <- score.xz$d2l.dtheta.dtheta yz.d2l.dtheta.dtheta <- score.yz$d2l.dtheta.dtheta dxresid.dtheta <- score.xz$dpresid.dtheta dyresid.dtheta <- score.yz$dpresid.dtheta prod <- x.resid*y.resid xres2 <- x.resid^2 yres2 <- y.resid^2 data <- data.frame(bmi=d$bmi, prod=prod, xres2=xres2, yres2=yres2) mod.prod <- ols(prod~ rcs(bmi, 3), data) mod.xres2 <- ols(xres2 ~ rcs(bmi, 3), data) mod.yres2 <- ols(yres2 ~ rcs(bmi, 3), data) result <- prodTS.new(x.resid, y.resid, z, xres2.method="model", yres2.method="model", xz.dl.dtheta, yz.dl.dtheta, xz.d2l.dtheta.dtheta, yz.d2l.dtheta.dtheta, dxresid.dtheta, dyresid.dtheta) new.bmi <- c(18:57) new.z <- cbind(1, rcspline.eval(new.bmi, knots=mod.prod$Design$parms$bmi, inclx=TRUE)) est.prod <- new.z %*% result$prod est.xres2 <- new.z %*% result$xres2 est.yres2 <- new.z%*% result$yres2 ##### point estimates are very close to the bootstrap result est.gamma <- est.prod/ sqrt(est.xres2 * est.yres2) ##### standard error : delta method dgamma.dprod <- apply(new.z, 2, FUN=function(x) 1/sqrt(est.xres2*est.yres2)*x) dgamma.dxres2<- apply(new.z, 2, FUN=function(x) - est.gamma/2/est.xres2 *x) dgamma.dyres2 <- apply(new.z, 2, FUN=function(x) - est.gamma/2/est.yres2 *x) dgamma.dcoef <- cbind(dgamma.dprod, dgamma.dxres2, dgamma.dyres2) cov.gamma <- dgamma.dcoef %*% result$var.coef %*% t(dgamma.dcoef) var.gamma <-diag(dgamma.dcoef %*% result$var.coef %*% t(dgamma.dcoef)) result.ci <- getCI(est.gamma, var.gamma, fisher=TRUE, ci=0.95) ##### silverman's rule of thumb for Gaussian kernel : h=1.06* sigma (age)* n^(-1/5) h=1.06*sd(d$bmi)*dim(d)[1]^(-1/5) #h=4 result.kernel <- kernel.cor(cd14.p, leptin.p, d$bmi, kernel="gaussian", h=h) result.kernel <- result.kernel[!duplicated(result.kernel), ] result.kernel <- result.kernel[order(result.kernel[,1]), ] postscript("leptin-cd14.eps",height=5, width=10) par(mar=c(4,2,1.5,3), mfrow=c(1,2)) plot(new.bmi, est.gamma, cex=0.1, type="n", ylim=c(-1, 1), xlab="BMI",ylab="Partial Rank Correlation") polygon(c(new.bmi, rev(new.bmi)), c(result.ci[,1], rev(result.ci[,2])), col = 'grey', border = NA) points(new.bmi, est.gamma, cex=0.1, type="l") points(new.bmi,result.ci[,1], cex=0.01, col="grey") points(new.bmi,result.ci[,2], cex=0.01, col="grey") lines(result.kernel, cex=0.1, col=1, lty=2) abline(h=cor(cd14.p, leptin.p), col=1, lty=3) legend("topleft", col=c(1,1,1 ), cex=0.7, legend=c("conditional (parametric)","conditional (kernel)", "partial"), bty="n", lty=c(1,2, 3)) ################################## Since x and y are both continuous (leptin and sCD14), I can simplify things by assuming Var(Xres)=Var(Yres)=1/3. ################################## This allows me to more easilty conmpute a p-value for the test that the rank correlation varies as a function of BMI. ####### stratified by bmi ##### result <- prodTS.new(x.resid, y.resid, z, xres2.method="constant", yres2.method="constant", xz.dl.dtheta, yz.dl.dtheta, xz.d2l.dtheta.dtheta, yz.d2l.dtheta.dtheta, dxresid.dtheta, dyresid.dtheta) est.prod <- new.z %*% result$prod est.xres2 <- 1/3 est.yres2 <- 1/3 ##### point estimates are very close to the bootstrap result est.gamma <- est.prod/ sqrt(est.xres2 * est.yres2) # I get the same doing it hard-coded (written first), or using Qi's delta method formulation: # var.est<-9*(new.z[,1]^2*result$var.coef[1,1]+new.z[,2]^2*result$var.coef[2,2]+new.z[,3]^2*result$var.coef[3,3]+ # 2*new.z[,1]*new.z[,2]*result$var.coef[1,2]+2*new.z[,1]*new.z[,3]*result$var.coef[1,3]+2*new.z[,2]*new.z[,3]*result$var.coef[2,3]) # result.ci <- getCI(est.gamma, var.est, fisher=TRUE, ci=0.95) # # ##### standard error : delta method # dgamma.dprod <- apply(new.z, 2, FUN=function(x) 1/sqrt(est.xres2*est.yres2)*x) # # dgamma.dcoef <- cbind(dgamma.dprod) # cov.gamma <- dgamma.dcoef %*% result$var.coef %*% t(dgamma.dcoef) # var.gamma <-diag(dgamma.dcoef %*% result$var.coef %*% t(dgamma.dcoef)) # # result.ci <- getCI(est.gamma, var.gamma, fisher=TRUE, ci=0.95) # plot(new.bmi, est.gamma, cex=0.1, type="n", ylim=c(-1, 1), xlab="BMI",ylab="Partial Rank Correlation") # polygon(c(new.bmi, rev(new.bmi)), c(result.ci[,1], rev(result.ci[,2])), col = 'grey', border = NA) # points(new.bmi, est.gamma, cex=0.1, type="l") # points(new.bmi,result.ci[,1], cex=0.01, col="grey") # points(new.bmi,result.ci[,2], cex=0.01, col="grey") # lines(result.kernel, cex=0.1, col=1, lty=2) # abline(h=cor(cd14.p, leptin.p), col=1, lty=3) # legend("topleft", col=c(1,1,1 ), cex=0.7, # legend=c("conditional (parametric)","conditional (kernel)", "partial"), bty="n", lty=c(1,2, 3)) #### Wald test that partial rank correlation between leptin and sCD14 varies as a function of BMI R<-matrix(c(1,0,0,1),ncol=2) V<-result$var.coef[-1,-1] 1-pchisq(t(R%*%mod.prod$coeff[-1]) %*% solve(R%*%V%*%R) %*% R%*%mod.prod$coeff[-1], df=2) anova(mod.prod) ##### Naive likelihood ratio test that doesn't account for uncertainty in PSRs produces similar estimate to Wald test. mod.prod.linear <- ols(prod~ bmi, data) anova(mod.prod.linear) ####### stratified by age ##### x.resid <- score.xz$presid y.resid <- score.yz$presid z <- model.matrix(as.formula("cd14 ~ rcs(age,3) "), data=d) z <- z[,-1] xz.dl.dtheta <- score.xz$dl.dtheta yz.dl.dtheta <- score.yz$dl.dtheta xz.d2l.dtheta.dtheta <- score.xz$d2l.dtheta.dtheta yz.d2l.dtheta.dtheta <- score.yz$d2l.dtheta.dtheta dxresid.dtheta <- score.xz$dpresid.dtheta dyresid.dtheta <- score.yz$dpresid.dtheta prod <- x.resid*y.resid xres2 <- x.resid^2 yres2 <- y.resid^2 data <- data.frame(age=d$age, prod=prod, xres2=xres2, yres2=yres2) mod.prod <- ols(prod~ rcs(age, 3), data) mod.xres2 <- ols(xres2 ~ rcs(age, 3), data) mod.yres2 <- ols(yres2 ~ rcs(age, 3), data) result <- prodTS.new(x.resid, y.resid, z, xres2.method="model", yres2.method="model", xz.dl.dtheta, yz.dl.dtheta, xz.d2l.dtheta.dtheta, yz.d2l.dtheta.dtheta, dxresid.dtheta, dyresid.dtheta) new.age <- (250:680)/10 new.z <- cbind(1, rcspline.eval(new.age, knots=mod.prod$Design$parms$age, inclx=TRUE)) est.prod <- new.z %*% result$prod est.xres2 <- new.z %*% result$xres2 est.yres2 <- new.z%*% result$yres2 ##### point estimates are very close to the bootstrap result est.gamma <- est.prod/ sqrt(est.xres2 * est.yres2) dgamma.dprod <- apply(new.z, 2, FUN=function(x) 1/sqrt(est.xres2*est.yres2)*x) dgamma.dxres2<- apply(new.z, 2, FUN=function(x) - est.gamma/2/est.xres2 *x) dgamma.dyres2 <- apply(new.z, 2, FUN=function(x) - est.gamma/2/est.yres2 *x) dgamma.dcoef <- cbind(dgamma.dprod, dgamma.dxres2, dgamma.dyres2) cov.gamma <- dgamma.dcoef %*% result$var.coef %*% t(dgamma.dcoef) var.gamma <-diag(dgamma.dcoef %*% result$var.coef %*% t(dgamma.dcoef)) result.ci <- getCI(est.gamma, var.gamma, fisher=TRUE, ci=0.95) ##### silverman's rule of thumb for Gaussian kernel : h=1.06* sigma (age)* n^(-1/5) h=1.06*sd(d$age)*dim(d)[1]^(-1/5) #h=5 result.kernel <- kernel.cor(cd14.p, leptin.p, d$age, kernel="gaussian", h=h) result.kernel <- result.kernel[!duplicated(result.kernel), ] result.kernel <- result.kernel[order(result.kernel[,1]), ] #par(mar=c(4,2,1.5,3), mfrow=c(1,1)) plot(new.age, est.gamma, cex=0.1, type="n", ylim=c(-1, 1), xlab="Age", ylab="Partial Rank Correlation") polygon(c(new.age, rev(new.age)), c(result.ci[,1], rev(result.ci[,2])), col = 'grey', border = NA) points(new.age, est.gamma, cex=0.1, type="l") points(new.age,result.ci[,1], cex=0.01, col="grey") points(new.age,result.ci[,2], cex=0.01, col="grey") lines(result.kernel, cex=0.1, col=1, lty=2) abline(h=cor(cd14.p, leptin.p), col=1, lty=3) legend("topleft", col=c(1,1,1 ), cex=0.7, legend=c("conditional (parametric)","conditional (kernel)", "partial"), bty="n", lty=c(1,2, 3)) dev.off() ################################## Since x and y are both continuous (leptin and sCD14), I can simplify things by assuming Var(Xres)=Var(Yres)=1/3. ################################## This allows me to more easilty conmpute a p-value for the test that the rank correlation varies as a function of age. result <- prodTS.new(x.resid, y.resid, z, xres2.method="constant", yres2.method="constant", xz.dl.dtheta, yz.dl.dtheta, xz.d2l.dtheta.dtheta, yz.d2l.dtheta.dtheta, dxresid.dtheta, dyresid.dtheta) est.prod <- new.z %*% result$prod est.xres2 <- 1/3 est.yres2 <- 1/3 est.gamma <- est.prod/ sqrt(est.xres2 * est.yres2) #### Wald test that partial rank correlation between leptin and sCD14 varies as a function of cd4 R<-matrix(c(1,0,0,1),ncol=2) V<-result$var.coef[-1,-1] 1-pchisq(t(R%*%mod.prod$coeff[-1]) %*% solve(R%*%V%*%R) %*% R%*%mod.prod$coeff[-1], df=2) anova(mod.prod) ##### Naive likelihood ratio test that doesn't account for uncertainty in PSRs produces similar estimate to Wald test. mod.prod.linear <- ols(prod~ age, data) anova(mod.prod.linear) ####### conditional on cd4 ##### z <- model.matrix(as.formula("cd14 ~ rcs(cd4,3) "), data=d) z <- z[,-1] xz.dl.dtheta <- score.xz$dl.dtheta yz.dl.dtheta <- score.yz$dl.dtheta xz.d2l.dtheta.dtheta <- score.xz$d2l.dtheta.dtheta yz.d2l.dtheta.dtheta <- score.yz$d2l.dtheta.dtheta dxresid.dtheta <- score.xz$dpresid.dtheta dyresid.dtheta <- score.yz$dpresid.dtheta prod <- x.resid*y.resid xres2 <- x.resid^2 yres2 <- y.resid^2 data <- data.frame(cd4=d$cd4, prod=prod, xres2=xres2, yres2=yres2) mod.prod <- ols(prod~ rcs(cd4, 3), data) mod.xres2 <- ols(xres2 ~ rcs(cd4, 3), data) mod.yres2 <- ols(yres2 ~ rcs(cd4, 3), data) result <- prodTS.new(x.resid, y.resid, z, xres2.method="model", yres2.method="model", xz.dl.dtheta, yz.dl.dtheta, xz.d2l.dtheta.dtheta, yz.d2l.dtheta.dtheta, dxresid.dtheta, dyresid.dtheta) new.cd4 <- c(1:150)*10 new.z <- cbind(1, rcspline.eval(new.cd4, knots=mod.prod$Design$parms$cd4, inclx=TRUE)) est.prod <- new.z %*% result$prod est.xres2 <- new.z %*% result$xres2 est.yres2 <- new.z%*% result$yres2 est.gamma <- est.prod/ sqrt(est.xres2 * est.yres2) dgamma.dprod <- apply(new.z, 2, FUN=function(x) 1/sqrt(est.xres2*est.yres2)*x) dgamma.dxres2<- apply(new.z, 2, FUN=function(x) - est.gamma/2/est.xres2 *x) dgamma.dyres2 <- apply(new.z, 2, FUN=function(x) - est.gamma/2/est.yres2 *x) dgamma.dcoef <- cbind(dgamma.dprod, dgamma.dxres2, dgamma.dyres2) cov.gamma <- dgamma.dcoef %*% result$var.coef %*% t(dgamma.dcoef) var.gamma <-diag(dgamma.dcoef %*% result$var.coef %*% t(dgamma.dcoef)) result.ci <- getCI(est.gamma, var.gamma, fisher=TRUE, ci=0.95) ##### silverman's rule of thumb for Gaussian kernel : h=1.06* sigma (age)* n^(-1/5) h=1.06*sd(d$cd4)*dim(d)[1]^(-1/5) h=175 result.kernel <- kernel.cor(cd14.p, leptin.p, d$cd4, kernel="gaussian", h=h) result.kernel <- result.kernel[!duplicated(result.kernel), ] result.kernel <- result.kernel[order(result.kernel[,1]), ] par(mar=c(4,2,1.5,3), mfrow=c(1,1)) plot(new.cd4, est.gamma, cex=0.1, type="n", ylim=c(-0.3, 1), xlab="CD4") polygon(c(new.cd4, rev(new.cd4)), c(result.ci[,1], rev(result.ci[,2])), col = 'grey', border = NA) points(new.cd4, est.gamma, cex=0.1, type="l") points(new.cd4,result.ci[,1], cex=0.01, col="grey") points(new.cd4,result.ci[,2], cex=0.01, col="grey") lines(result.kernel, cex=0.1, col=1, lty=2) abline(h=cor(cd14.p, leptin.p), col=1, lty=3) legend("topleft", col=c(1,1,1 ), cex=0.7, legend=c("conditional (parametric)","conditional (kernel)", "partial"), bty="n", lty=c(1,2, 3)) ################################## Since x and y are both continuous (leptin and sCD14), I can simplify things by assuming Var(Xres)=Var(Yres)=1/3. ################################## This allows me to more easilty conmpute a p-value for the test that the rank correlation varies as a function of age. result <- prodTS.new(x.resid, y.resid, z, xres2.method="constant", yres2.method="constant", xz.dl.dtheta, yz.dl.dtheta, xz.d2l.dtheta.dtheta, yz.d2l.dtheta.dtheta, dxresid.dtheta, dyresid.dtheta) est.prod <- new.z %*% result$prod est.xres2 <- 1/3 est.yres2 <- 1/3 est.gamma <- est.prod/ sqrt(est.xres2 * est.yres2) #### Wald test that partial rank correlation between leptin and sCD14 varies as a function of CD4 R<-matrix(c(1,0,0,1),ncol=2) V<-result$var.coef[-1,-1] 1-pchisq(t(R%*%mod.prod$coeff[-1]) %*% solve(R%*%V%*%R) %*% R%*%mod.prod$coeff[-1], df=2) anova(mod.prod) ##### Naive likelihood ratio test that doesn't account for uncertainty in PSRs produces similar estimate to Wald test. mod.prod.linear <- ols(prod~ cd4, data) anova(mod.prod.linear)