## This is R code for calculating HWE p-value using trio data. ## NOTE: ALL OFFSPRING OF THE TRIOS ARE ASSUMED TO BE AFFECTED. ## The method is detailed in ## Li M, Li C (2008) Assessing departure from Hardy-Weinberg equilibrium ## in the presence of disease association. Genetic Epidemiology 32:589-599 ## (PMID: 18449919) ## Chun Li (chun.li@vanderbilt.edu) ## 04/29/2009 ## The argument for the function HWETRIO() should be a vector of 10 ## integers. They are the number of trios of the 10 possible trio ## genotypes, in the following order: ## (0x0-0), (0x1-0), (0x1-1), (0x2-1), (1x1-0), (1x1-1), (1x1-2), ## (1x2-1), (1x2-2), (2x2-2) ## Here, 0, 1, 2 are genotypes (representing # of allele 1). ## Example: ## For the above 10 trio types, the counts are ## 12 26 19 37 9 36 10 16 23 12 ## Then the p-value can be obtained by calling ## HWETRIO(c(12, 26, 19, 37, 9, 36, 10, 16, 23, 12)) HWETRIO <- function(triocounts) { n1 = triocounts[1] n2 = triocounts[2] n3 = triocounts[3] n4 = triocounts[4] n5 = triocounts[5] n6 = triocounts[6] n7 = triocounts[7] n8 = triocounts[8] n9 = triocounts[9] n10 = triocounts[10] ## likelihood under null (r1, r2, p) triolkhd0 = function(x) { ## allele frequencies: p = Pr(allele 1), q = Pr(allele 0) ## relative risks: r1 = f1/f0, r2 = f2/f0 r1 = x[1]; r2 = x[2] p = x[3]; q = 1 - p ## The order of 10 trio types is: (0x0-0), (0x1-0), (0x1-1), ## (0x2-1), (1x1-0), (1x1-1), (1x1-2), (1x2-1), (1x2-2), (2x2-2) triofreq = c(q^4, 2*q^3*p, 2*q^3*p, 2*q^2*p^2, q^2*p^2, 2*q^2*p^2, q^2*p^2, 2*q*p^3, 2*q*p^3, p^4) K = sum(triofreq * c(1,1,r1,r1,1,r1,r2,r1,r2,r2)) (4*n1 + 3*(n2+n3) + 2*(n4+n5+n6+n7) + n8 + n9) * log(q) + (n2+n3 + 2*(n4+n5+n6+n7) + 3*(n8+n9) + 4*n10) * log(p) + (n3 + n4 + n6 + n8) * log(r1) + (n7 + n9 + n10) * log(r2) - (n1+n2+n3+n4+n5+n6+n7+n8+n9+n10) * log(K) + (n2+n3+n4+n6+n8+n9) * log(2) } ## likelihood under alternative (r1, r2, P1, P2) triolkhd1 = function(x) { ## genotype frequencies: P0, P1, P2 ## relative risks: r1 = f1/f0, r2 = f2/f0 r1 = x[1]; r2 = x[2] P1 = x[3]; P2 = x[4]; P0 = 1 - P1 - P2 ## The order of 10 trio types is: (0x0-0), (0x1-0), (0x1-1), ## (0x2-1), (1x1-0), (1x1-1), (1x1-2), (1x2-1), (1x2-2), (2x2-2) triofreq = c(P0^2, P0*P1, P0*P1, 2*P0*P2, 0.25*P1^2, 0.5*P1^2, 0.25*P1^2, P1*P2, P1*P2, P2^2) K = sum(triofreq * c(1,1,r1,r1,1,r1,r2,r1,r2,r2)) (2*n1 + n2+n3+n4) * log(P0) + (n2+n3 + 2*(n5+n6+n7) + (n8+n9)) * log(P1) + (n4+n8+n9 + 2*n10) * log(P2) + (n3 + n4 + n6 + n8) * log(r1) + (n7 + n9 + n10) * log(r2) - (n1+n2+n3+n4+n5+n6+n7+n8+n9+n10) * log(K) + (n4-2*n5-n6-2*n7) * log(2) } ## Obtain the maximum likelihood under the null repeat { ## During optimization, the log() function can produce a warning ## message "NaNs produced". We remove them before assign("EE", 1, pos=1) suppressWarnings(tryCatch(assign("loglkhd0", optim(par=c(rexp(2), runif(1)), fn=triolkhd0, method="L-BFGS-B", lower=c(0,0,0), upper=c(1, Inf, Inf), control=list(fnscale=-1))$value), error = function(x) assign("EE", -1, pos=1))) if(EE == 1) break } ## Obtain the maximum likelihood under the alternative repeat { assign("EE", 1, pos=1) tt = runif(3); pp = tt/sum(tt) suppressWarnings(tryCatch(assign("loglkhd1", optim(par=c(rexp(2), pp[2:3]), fn=triolkhd1, method="L-BFGS-B", lower=c(0,0,0), upper=c(1, Inf, Inf), control=list(fnscale=-1))$value), error = function(x) assign("EE", -1, pos=1))) if(EE == 1) break } ## likelihood ratio test p-value 1-pchisq(2*(loglkhd1 - loglkhd0),1) }