#### Data analysis programms for testing for association between two #### ordinal variables while adjusting for covariates. #### #### Chun Li #### October 20, 2009 #### Chun Li and Bryan E. Shepherd (2010) Test of association between #### two ordinal variables while adjusting for covariates. Journal of #### the American Statistical Association, 105:612-620 ### 12/06/2010: Old version did not work for ordinal variables with 3 levels ### Change a line in ordinal.scores() from ### if(na > 1) diag(rowdiff[,-1]) = -1 ### to ### if(na > 1) rowdiff[cbind(1:(na-1),2:na)] = -1 #### This implementation of our methods assumes proportional odds #### relationship between y and Z and between x and Z. In principle, #### any other multinomial regression analysis can be used in place of #### proportional odds models. #### R library requirement: ### Design package is needed for the lrm() function. #### functions defined: ## diagn(): a modification of diag(), for consistent coding ## tau(): Goodman-Kruskal's gamma ## COBOT.stat(): called by COBOT.emp() ## COBOT.emp(): calculates p-values using empirical distributions ## ordinal.scores(): called by COBOT.scores() ## COBOT.scores(): calculates p-values using asymptotics #### Data analysis functions: ## COBOT.emp(data, Nemp=1000) ## COBOT.scores(data) ## ## COBOT.emp() calculates p-values using empirical distributions ## COBOT.scores() calculates p-values using asymptotics ## ## The argument "data" must be a list with three elements: ## y: a vector with integer values representing levels (1 to ny) ## x: a vector with integer values representing levels (1 to nx) ## z: a vector or matrix of numerical covariate values ## The lengths of y, x, z (or nrow(z) if matrix) must be the same. ## ## Subjects with missing data are removed by these functions. ## ## Variables y and x are recoded to so that if variable x (y) has nx (ny) ## categories, the categories are coded from 1 to nx (ny). ## ## For COBOT.emp(), two sets of p-values are estimated: ## Set a: proportion of abs(emp) >= abs(test.stat) ## Set b: two times the proportion of smaller tail #### This function is called from COBOT.emp(). #### Calculation of the three test statistics COBOT.stat <- function(data, nx0, ny0) { ## Ensure code works if called from somewhere else (not COBOT.emp()). ## Make z a matrix if it is a vector. This makes later coding consistent. if(!is.matrix(data\$z)) data\$z <- matrix(data\$z, ncol=1) tablex <- table(data\$x) tabley <- table(data\$y) nx <- length(tablex) ny <- length(tabley) nax <- nx - 1L nay <- ny - 1L nz <- ncol(data\$z) N <- length(data\$y) ## Fit separate proportional odds models under the null ## lrm() may fail if the slope is very close to zero. This is rare but ## needs to be taken care of. NAs are returned when this happens. ## polr() in MASS library seems to have problems too. ## modxz = lrm(x ~ z) ## modyz = lrm(y ~ z) EE <- FALSE .ErrorCatch <- function(e) { EE <<- TRUE e } modxz <- suppressWarnings(tryCatch(polr(factor(x) ~ z, data=data), error=.ErrorCatch)) if(EE) { warning(modxz) return(list(stat=NA, pred.probx=NULL, pred.proby=NULL)) } modyz <- suppressWarnings(tryCatch(polr(factor(y) ~ z, data=data), error=.ErrorCatch)) if(EE) { warning(modyz) return(list(stat=NA, pred.probx=NULL, pred.proby=NULL)) } ## Obtain predicted cumulative probabilities. Each subject is a row. px <- matrix(plogis(matrix(modxz\$zeta, nrow=N, ncol=nax, byrow=TRUE) - modxz\$lp),ncol=nax) py <- matrix(plogis(matrix(modyz\$zeta, nrow=N, ncol=nay, byrow=TRUE) - modyz\$lp),ncol=nay) sum.matrix <- crossprod(modyz\$fitted.values, modxz\$fitted.values) ## T1 is to contrast tau of observed and expected frequencies. T1 <- tau(table(data\$y,data\$x))\$tau - tau(sum.matrix)\$tau ## Select the cumulative hi/low probabilities from the empirical data ## compenstating for categories droped from the empirical data set. catx <- as.integer(names(tablex)) ## Find the nearest Index adding early and latex indexes expandIndex <- cummax(c(0L, match(seq_len(nx0 - 1L), catx, nomatch=0L), nx)) + 1L px <- cbind(0,px,1) modifiedIndex <- matrix(nrow=N, ncol=2L) modifiedIndex[,1L] <- seq_len(N) modifiedIndex[,2L] <- expandIndex[data\$x] ## components of residuals low.x <- px[modifiedIndex] modifiedIndex[,2L] <- expandIndex[data\$x + 1L] hi.x <- 1 - px[modifiedIndex] caty <- as.numeric(names(tabley)) expandIndex <- cummax(c(0L, match(seq_len(ny0 - 1L), caty, nomatch=0L), ny)) + 1L py <- cbind(0,py,1) modifiedIndex[,2L] <- expandIndex[data\$y] low.y <- py[modifiedIndex] modifiedIndex[,2L] <- expandIndex[data\$y + 1L] hi.y <- 1 - py[modifiedIndex] ## T2 is correlation of residuals T2 <- cor(hi.x - low.x, hi.y - low.y) Cprob <- low.x*low.y + hi.x*hi.y Dprob <- low.x*hi.y + hi.x*low.y Cmean <- mean(Cprob) Dmean <- mean(Dprob) ## T3 is Cmean-Dmean T3 <- Cmean - Dmean return(list(stat=c(T1,T2,T3), pred.probx=modxz\$fitted.values, pred.proby=modyz\$fitted.values)) } #### P-value estimation for the three test statistics using empirical #### distributions. Two sets of p-values are estimated: #### Set a: proportion of abs(emp) >= abs(test.stat) #### Set b: two times the proportion of smaller tail COBOT.emp <- function(data, Nemp=1000) { ## Make z a matrix if it is a vector. This makes later coding consistent. if(!is.matrix(data\$z)) data\$z <- matrix(data\$z, ncol=1L) ## Check missing data. exclude <- is.na(data\$y) | is.na(data\$x) | rowSums(is.na(data\$z)) ## Exclude subjects with missing data. ## Ensure categories are coded from 1 to ny and from 1 to nx. data\$y <- as.integer(as.factor(data\$y[!exclude])) data\$x <- as.integer(as.factor(data\$x[!exclude])) data\$z <- data\$z[!exclude,, drop=FALSE] nx <- length(table(data\$x)) ny <- length(table(data\$y)) nxny <- nx*ny nxnym1 <- nxny - 1L N <- length(data\$y) test.stat <- COBOT.stat(data, nx, ny) ## cum.mult.prob has cummalative probabilities cum.mult.prob <- t(vapply(split(test.stat\$pred.proby[,rep(seq_len(ny), times=nx)[-nxny]] * test.stat\$pred.probx[,rep(seq_len(nx), each=ny)[-nxny]], seq_len(N)), cumsum, numeric(nxnym1))) ## Generate empirical distribution of the test statistic under the null ## First, generate data using the product predicted probabilities aa <- matrix(runif(N*Nemp), N) xyemp <- matrix(as.integer(rowSums(array(aa[,rep.int(seq_len(Nemp), times=nxnym1)] > cum.mult.prob[,rep(seq_len(nxnym1), each=Nemp)], dim=c(N, Nemp, nxnym1)), dims=2L)), nrow=N, ncol=Nemp) ## array(paste(col(aa)[,rep(seq_len(15), times=nxnym1)], ## col(cum.mult.prob)[,rep(seq_len(nxnym1), each=15)], ## row(aa)[,rep(seq_len(15), times=nxnym1)], ## row(cum.mult.prob)[,rep(seq_len(nxnym1), each=15)]), ## dim=c(N, 15, nxnym1))[350, 1:10, 1:10] xemp <- xyemp %/% ny + 1L yemp <- xyemp %% ny + 1L test.stat.emp <- matrix(,3,Nemp) for(j in seq_len(Nemp)) { test.stat.emp[,j] = COBOT.stat(list(x=xemp[,j], y=yemp[,j], z=data\$z), nx, ny)\$stat } ## Obtain p-values. Since test.stat.emp may have NAs, apply() is ## used instead of the faster matrix multiplication. ## pvala = ((abs(test.stat\$stat) <= abs(test.stat.emp)) %*% rep(1,Nemp))/Nemp ## tmp = ((test.stat\$stat <= test.stat.emp) %*% rep(1,Nemp))/Nemp pvala <- rowMeans(abs(test.stat2\$stat) <= abs(test.stat.emp), na.rm=TRUE) tmp <- rowMeans(test.stat2\$stat <= test.stat.emp, na.rm=TRUE) pvalb <- 2*pmin(tmp, 1-tmp) return(c(pvala, pvalb)) } #### This function is called from COBOT.scores(). #### It calculates all values needed for estimating equations. ordinal.scores2 = function(y, X) { ## y is a numeric vector ## X is a vector or matrix with one or more columns. # require(Design) ## for the lrm() function ## Ensure code works if called from somewhere else (not COBOT.scores()). ## Make X a matrix if it is a vector. This makes later coding consistent. if(!is.matrix(X)) X = matrix(X, ncol=1) ## N: number of subjects; ny: number of y categories N = length(y) ny = length(table(y)) ## na, nb: number of parameters in alpha and beta na = ny - 1 nb = ncol(X) npar = na + nb ## Fit proportional odds model and obtain the MLEs of parameters. mod = newpolr(factor(y) ~ X, Hess=TRUE) alpha = mod\$zeta beta = coef(mod) eta <- mod\$lp ## Predicted probabilities p0 and dp0.dtheta are stored for individuals. p0 = mod\$fitted.values dp0.dtheta = mod\$dfitted.values ## Cumulative probabilities Gamma = mod\$cumpr dcumpr <- mod\$dcumpr ## Scores are stored for individuals. dl.dtheta = mod\$grad d2l.dtheta.dtheta <- mod\$Hessian list(mod = mod, dl.dtheta = dl.dtheta, d2l.dtheta.dtheta = d2l.dtheta.dtheta, p0 = p0, dp0.dtheta = dp0.dtheta, Gamma = Gamma, dcumpr=dcumpr) } #### P-value estimation for the three test statistics using M-estimation. COBOT.scores2 = function(data) { ## Make z a matrix if it is a vector. This makes later coding consistent. if(!is.matrix(data\$z)) data\$z = matrix(data\$z, ncol=1) ## Check missing data. exclude = is.na(data\$y) | is.na(data\$x) | apply(data\$z, 1, function(x) sum(is.na(x))) ## Exclude subjects with missing data. ## Ensure categories are coded from 1 to ny and from 1 to nx. data\$y = as.numeric(as.factor(data\$y[!exclude])) data\$x = as.numeric(as.factor(data\$x[!exclude])) data\$z = data\$z[!exclude, ] ## This is ensure data\$z is a matrix. The above statement will make ## data\$z a vector if ncol=1. if(!is.matrix(data\$z)) data\$z = matrix(data\$z, ncol=1) score.xz = ordinal.scores2(data\$x, data\$z) score.yz = ordinal.scores2(data\$y, data\$z) npar.xz = dim(score.xz\$dl.dtheta)[2] npar.yz = dim(score.yz\$dl.dtheta)[2] xx = data\$x; yy = data\$y zz <- data\$z nx = length(table(xx)) ny = length(table(yy)) N = length(yy) #### Asymptotics for T3 = mean(Cprob) - mean(Dprob) ## If gamma.x[0]=0 and gamma.x[nx]=1, then ## low.x = gamma.x[x-1], hi.x = (1-gamma.x[x]) low.x = cbind(0, score.xz\$Gamma)[cbind(1:N, xx)] low.y = cbind(0, score.yz\$Gamma)[cbind(1:N, yy)] hi.x = cbind(1-score.xz\$Gamma, 0)[cbind(1:N, xx)] hi.y = cbind(1-score.yz\$Gamma, 0)[cbind(1:N, yy)] Cprob = low.x*low.y + hi.x*hi.y Dprob = low.x*hi.y + hi.x*low.y mean.Cprob = mean(Cprob) mean.Dprob = mean(Dprob) T3 = mean.Cprob - mean.Dprob dcumpr.xz <- cbind(0, score.xz\$dcumpr, 0) dcumpr.yz <- cbind(0, score.yz\$dcumpr, 0) dlowx.dthetax <- cbind((col(score.xz\$dcumpr) == (xx - 1L)) * score.xz\$dcumpr, dcumpr.xz[cbind(1:N,xx)] * zz) dlowy.dthetay <- cbind((col(score.yz\$dcumpr) == (yy - 1L)) * score.yz\$dcumpr, dcumpr.yz[cbind(1:N,yy)] * zz) dhix.dthetax <- -cbind((col(score.xz\$dcumpr) == xx) * score.xz\$dcumpr, dcumpr.xz[cbind(1:N,xx + 1L)] * zz) dhiy.dthetay <- -cbind((col(score.yz\$dcumpr) == yy) * score.yz\$dcumpr, dcumpr.yz[cbind(1:N,yy + 1L)] * zz) dCsum.dthetax <- crossprod(dlowx.dthetax, low.y) + crossprod(dhix.dthetax, hi.y) dCsum.dthetay <- crossprod(dlowy.dthetay, low.x) + crossprod(dhiy.dthetay, hi.x) dDsum.dthetax <- crossprod(dlowx.dthetax, hi.y) + crossprod(dhix.dthetax, low.y) dDsum.dthetay <- crossprod(dlowy.dthetay, hi.x) + crossprod(dhiy.dthetay, low.x) dT3sum.dtheta = c(dCsum.dthetax-dDsum.dthetax, dCsum.dthetay-dDsum.dthetay) ## Estimating equations for (theta, p3) ## theta is (theta.xz, theta.yz) and the equations are score functions. ## p3 is the "true" value of test statistic, and the equation is ## p3 - (Ci-Di) bigphi = cbind(score.xz\$dl.dtheta, score.yz\$dl.dtheta, T3-(Cprob-Dprob)) ## sandwich variance estimate of var(thetahat) Ntheta = npar.xz + npar.yz + 1 A = matrix(0,Ntheta,Ntheta) A[1:npar.xz, 1:npar.xz] = score.xz\$d2l.dtheta.dtheta A[npar.xz+(1:npar.yz), npar.xz+(1:npar.yz)] = score.yz\$d2l.dtheta.dtheta A[Ntheta, -Ntheta] = -dT3sum.dtheta A[Ntheta, Ntheta] = N ## One way of coding: ##B = t(bigphi) %*% bigphi ##var.theta = solve(A) %*% B %*% solve(A) ## Suggested coding for better efficiency and accuracy: SS = solve(A, t(bigphi)) var.theta = tcrossprod(SS, SS) varT3 = var.theta[Ntheta, Ntheta] pvalT3 = 2 * pnorm(-abs(T3)/sqrt(varT3)) #### Asymptotics for T4 = (mean(Cprob)-mean(Dprob))/(mean(Cprob)+mean(Dprob)) T4 = (mean.Cprob - mean.Dprob)/(mean.Cprob + mean.Dprob) ## Estimating equations for (theta, P4) ## theta is (theta.xz, theta.yz) and the equations are score functions. ## P4 is a vector of (cc, dd, p4). Their corresponding equations are: ## cc - Ci ## dd - Di ## p4 - (cc-dd)/(cc+dd) ## Then p4 is the "true" value of test statistic. bigphi = cbind(score.xz\$dl.dtheta, score.yz\$dl.dtheta, mean.Cprob - Cprob, mean.Dprob - Dprob, 0) ## sandwich variance estimate of var(thetahat) Ntheta = npar.xz + npar.yz + 3 A = matrix(0,Ntheta,Ntheta) A[1:npar.xz, 1:npar.xz] = score.xz\$d2l.dtheta.dtheta A[npar.xz+(1:npar.yz), npar.xz+(1:npar.yz)] = score.yz\$d2l.dtheta.dtheta A[Ntheta-3+(1:3), Ntheta-3+(1:3)] = diag(N, 3) A[Ntheta-2, 1:(npar.xz+npar.yz)] = -c(dCsum.dthetax, dCsum.dthetay) A[Ntheta-1, 1:(npar.xz+npar.yz)] = -c(dDsum.dthetax, dDsum.dthetay) revcpd = 1/(mean.Cprob + mean.Dprob) dT4.dcpd = (mean.Cprob-mean.Dprob)*(-revcpd^2) A[Ntheta, Ntheta-3+(1:2)] = -N * c(revcpd+dT4.dcpd, -revcpd+dT4.dcpd) ## One way of coding: ##B = t(bigphi) %*% bigphi ##var.theta = solve(A) %*% B %*% solve(A) ## Suggested coding for better efficiency and accuracy: SS = solve(A, t(bigphi)) var.theta = tcrossprod(SS, SS) varT4 = var.theta[Ntheta, Ntheta] pvalT4 = 2 * pnorm(-abs(T4)/sqrt(varT4)) #### Asymptotics for T2 = cor(hi.x - low.x, hi.y - low.y) xresid = hi.x - low.x yresid = hi.y - low.y T2 = cor(xresid, yresid) xbyyresid = xresid * yresid xresid2 = xresid^2 yresid2 = yresid^2 mean.xresid = mean(xresid) mean.yresid = mean(yresid) mean.xbyyresid = mean(xbyyresid) ## T2 also equals numT2 / sqrt(varprod) = numT2 * revsvp numT2 = mean.xbyyresid - mean.xresid * mean.yresid var.xresid = mean(xresid2) - mean.xresid^2 var.yresid = mean(yresid2) - mean.yresid^2 varprod = var.xresid * var.yresid revsvp = 1/sqrt(varprod) dT2.dvarprod = numT2 * (-0.5) * revsvp^3 ## Estimating equations for (theta, P5) ## theta is (theta.xz, theta.yz) and the equations are score functions. ## P5 is a vector (ex, ey, crossxy, ex2, ey2, p5). ## Their corresponding equations are: ## ex - (hi.x-low.x)[i] ## ey - (hi.y-low.y)[i] ## crossxy - ((hi.x-low.x)*(hi.y-low.y))[i] ## ex2 - (hi.x-low.x)[i]^2 ## ey2 - (hi.y-low.y)[i]^2 ## p5 - (crossxy-ex*ey)/sqrt((ex2-ex^2)*(ey2-ey^2)) ## Then p5 is the "true" value of test statistic bigphi = cbind(score.xz\$dl.dtheta, score.yz\$dl.dtheta, mean.xresid - xresid, mean.yresid - yresid, mean.xbyyresid - xbyyresid, mean(xresid2) - xresid2, mean(yresid2) - yresid2, 0) ## sandwich variance estimate of var(thetahat) Ntheta = npar.xz + npar.yz + 6 A = matrix(0,Ntheta,Ntheta) A[1:npar.xz, 1:npar.xz] = score.xz\$d2l.dtheta.dtheta A[npar.xz+(1:npar.yz), npar.xz+(1:npar.yz)] = score.yz\$d2l.dtheta.dtheta A[Ntheta-6+(1:6), Ntheta-6+(1:6)] = diag(N, 6) dxresid.dthetax = dhix.dthetax - dlowx.dthetax dyresid.dthetay = dhiy.dthetay - dlowy.dthetay bigpartial = rbind(c(crossprod(dxresid.dthetax, rep(1, N)), rep(0, npar.yz)), c(rep(0, npar.xz), crossprod(dyresid.dthetay, rep(1, N))), c(crossprod(dxresid.dthetax, yresid), crossprod(dyresid.dthetay, xresid)), c(crossprod(dxresid.dthetax, (2*xresid)), rep(0, npar.yz)), c(rep(0, npar.xz), crossprod(dyresid.dthetay, (2*yresid)))) A[Ntheta-6+(1:5), 1:(npar.xz+npar.yz)] = -bigpartial smallpartial = N * c(-mean.yresid * revsvp + dT2.dvarprod * (-2*mean.xresid*var.yresid), -mean.xresid * revsvp + dT2.dvarprod * (-2*mean.yresid*var.xresid), revsvp, dT2.dvarprod * var.yresid, dT2.dvarprod * var.xresid) A[Ntheta, Ntheta-6+(1:5)] = -smallpartial ## One way of coding: ##B = t(bigphi) %*% bigphi ##var.theta = solve(A) %*% B %*% solve(A) ## Suggested coding for better efficiency and accuracy: SS = solve(A, t(bigphi)) var.theta = tcrossprod(SS, SS) varT2 = var.theta[Ntheta, Ntheta] pvalT2 = 2 * pnorm(-abs(T2)/sqrt(varT2)) #### Asymptotics for T1 = tau - tau0 ## dtau0/dtheta ## P0 is the sum of product predicted probability matrix with dim(nx,ny) ## P0 = crossprod(score.xz\$p0, score.yz\$p0) / N ## cdtau0 = tau(P0) ## C0 = cdtau0\$scon ## D0 = cdtau0\$sdis ## ## C0 = sum_{l>j,m>k} {P0[j,k] * P0[l,m]} ## ## D0 = sum_{l>j,m1 & j>1, sum(P0[1:(i-1), 1:(j-1)]), 0) + ## ifelse(i1 & j1, sum(P0[(i+1):nx, 1:(j-1)]), 0) ## } ## ## tau0 = (C0-D0)/(C0+D0) ## dtau0.dC0 = 2*D0/(C0+D0)^2 ## dtau0.dD0 =-2*C0/(C0+D0)^2 ## ## P0 is already a matrix ## dP0.dtheta.x = array(0, c(nx, ny, npar.xz)) ## for(j in 1:ny) { ## aa = matrix(0, nx, npar.xz) ## for(i in 1:N) ## aa = aa + score.xz\$dp0.dtheta[i,,] * score.yz\$p0[i,j] ## dP0.dtheta.x[,j,] = aa/N ## ## simpler but mind-tickling version ## #dP0.dtheta.x[,j,] = (score.yz\$p0[,j] %*% matrix(score.xz\$dp0.dtheta,N))/N ## } ## dP0.dtheta.y = array(0, c(nx, ny, npar.yz)) ## for(j in 1:nx) { ## aa = matrix(0, ny, npar.yz) ## for(i in 1:N) ## aa = aa + score.yz\$dp0.dtheta[i,,] * score.xz\$p0[i,j] ## dP0.dtheta.y[j,,] = aa/N ## } ## ## dC0.dtheta and dD0.dtheta ## dC0.dtheta.x = as.numeric(dC0.dP0) %*% matrix(dP0.dtheta.x, nx*ny) ## dD0.dtheta.x = as.numeric(dD0.dP0) %*% matrix(dP0.dtheta.x, nx*ny) ## dC0.dtheta.y = as.numeric(dC0.dP0) %*% matrix(dP0.dtheta.y, nx*ny) ## dD0.dtheta.y = as.numeric(dD0.dP0) %*% matrix(dP0.dtheta.y, nx*ny) ## ## dtau0/dtheta ## dtau0.dtheta.x = dtau0.dC0 * dC0.dtheta.x + dtau0.dD0 * dD0.dtheta.x ## dtau0.dtheta.y = dtau0.dC0 * dC0.dtheta.y + dtau0.dD0 * dD0.dtheta.y ## ## dtau/dPa ## ## tau = (C-D)/(C+D) ## Pa = table(data\$x, data\$y) / N ## cdtau = tau(Pa) ## C = cdtau\$scon ## D = cdtau\$sdis ## dtau.dC = 2*D/(C+D)^2 ## dtau.dD =-2*C/(C+D)^2 ## ## Pa[nx,ny] is not a parameter, but = 1 - all other Pa parameters. ## ## Thus, d.Pa[nx,ny]/d.Pa[i,j] = -1. ## ## Also, d.sum(Pa[-nx,-ny]).dPa[i,j] = 1 when ij,m>k} {Pa[j,k] * Pa[l,m]}, Pa[i,j] appears in ## ## Pa[i,j] * XX (minus Pa[nx,ny] if ij,m1 & j>1, sum(Pa[1:(i-1), 1:(j-1)]), 0) + ## ifelse(i1 & j1, sum(Pa[(i+1):nx, 1:(j-1)]), 0) ## } ## dtau.dPa = dtau.dC * dC.dPa + dtau.dD * dD.dPa ## dtau.dPa = dtau.dPa[-length(dtau.dPa)] ## remove the last value ## ## Estimating equations for (theta, phi) ## ## theta is (theta.xz, theta.yz) and the equations are score functions. ## ## phi is (p_ij) for (X,Y), and the equations are ## ## I{subject in cell (ij)} - p_ij ## phi.Pa = matrix(0, N, nx*ny) ## phi.Pa[cbind(1:N, xx+(yy-1)*nx)] = 1 ## phi.Pa = phi.Pa - rep(1,N) %o% as.numeric(Pa) ## phi.Pa = phi.Pa[,-(nx*ny)] ## bigphi = cbind(score.xz\$dl.dtheta, score.yz\$dl.dtheta, phi.Pa) ## ## sandwich variance estimate of var(thetahat, phihat) ## Ntheta = npar.xz + npar.yz + nx*ny-1 ## A = matrix(0,Ntheta,Ntheta) ## A[1:npar.xz, 1:npar.xz] = score.xz\$d2l.dtheta.dtheta ## A[npar.xz+(1:npar.yz), npar.xz+(1:npar.yz)] = score.yz\$d2l.dtheta.dtheta ## A[-(1:(npar.xz+npar.yz)), -(1:(npar.xz+npar.yz))] = -diag(N, nx*ny-1) ## ## One way of coding: ## ##B = t(bigphi) %*% bigphi ## ##var.theta = solve(A) %*% B %*% solve(A) ## ## Suggested coding for better efficiency and accuracy: ## ##SS = solve(A, t(bigphi)) ## ##var.theta = SS %*% t(SS) ## ## Or better yet, no need to explicitly obtain var.theta. See below. ## ## Test statistic T1 = tau - tau0 ## T1 = cdtau\$tau - cdtau0\$tau ## ## dT.dtheta has length nx + ny + nx*ny-1 ## dT1.dtheta = c(-dtau0.dtheta.x, -dtau0.dtheta.y, dtau.dPa) ## ## variance of T, using delta method ## ##varT = t(dT.dtheta) %*% var.theta %*% dT.dtheta ## SS = crossprod(dT1.dtheta, solve(A, t(bigphi))) ## varT1 = sum(SS^2) ## pvalT1 = 2 * pnorm(-abs(T1)/sqrt(varT1)) list(pvalT2=pvalT2, varT2=varT2, T2=T2, pvalT3=pvalT3, varT3=varT3, T3=T3) }