#### Calculation genome-wide overall power for a SNP chip.
#### Chun Li, 10/29/2007
#### The method is described in
#### Li et al. (2007) Evaluating cost efficiency of SNP chips in genome-wide association studies.
#### The calculation is based on the allele-based case-control association test.
#### The minor allele is treated as the risk allele.
#### Genotypes are denoted as 0, 1, 2, the number of copies of risk allele.
#### The disease prevalence and GRRs (or ORs) are assumed to be the same for all disease variants.
#### table: Input table for a SNP chip. The rows are intervals for max r2, partitioned evenly to
#### cover [0,1]. The columns are intervals for MAF, partitioned evenly to cover [0,.5].
#### prev: Disease prevalence
#### GRR: Genotype relative risk (see below)
#### RR1, RR2: Genotype relative risks for genotypes 1 and 2 compared to genotype 0.
#### If both are unspecified, use RR1=GRR and RR2=RR1*RR1.
#### If RR2 is unspecified, use RR2=RR1*RR1.
#### Both RR1 and RR2 should be >=1.
#### If unspecified, multiplicative model is used: RR2=RR1*RR1.
#### OR: Genotype odds ratio (see below)
#### OR1, OR2: Odds ratio for genotypes 1 and 2 compared to genotype 0.
#### If both are unspecified, use OR1=OR and OR2=OR1*OR1.
#### If OR2 is unspecified, use OR2=OR1*OR1.
#### Both OR1 and OR2 should be >=1.
#### N: Sample size (see below)
#### Ncase, Ncntl: Number of cases and controls.
#### If unspecified, use N.
#### alpha: Significance level
GWApower = function(table, prev, GRR=NULL, OR=NULL, N=NULL, alpha=0.05,
RR1=GRR, RR2=RR1*RR1, OR1=OR, OR2=OR1*OR1, Ncase=N, Ncntl=N) {
## R is unlike C. For code (A & B), R will evaluate both, while C will evaluate A
## first and evaluate B only when A is true.
## Because of this, I have to use nested if to avoid error messages.
if(is.null(RR1) & is.null(OR1)) {
## When both RR and OR are null: stop.
stop("Either relative risk or odds ratio needs to be specified.")
} else if(!is.null(RR1) & !is.null(OR1)) {
## When both RR and OR are non-null: if both are NA, stop; if both are not NA, use RR.
if(is.na(RR1) & is.na(OR1)) {
stop("Either relative risk or odds ratio needs to be specified.")
}
if(!is.na(RR1) & !is.na(OR1)) {
USE = "RR"
warning("Both relative risk and odds ratio are specified. Relative risks are used.")
OR = OR1 = OR2 = NULL
}
} else {
## When only one of RR and OR is non-null: use it
if(!is.null(RR1)) USE = "RR"
if(!is.null(OR1)) USE = "OR"
}
## check to ensure RRs>=1, ORs>=1, and prev is in (0,1)
if(USE == "RR") {
if(RR1<1 | RR2<1) {
stop("Relative risks must be greater than or equal to 1")
}
}
if(USE == "OR") {
if(OR1<1 | OR2<1) {
stop("Odds ratios must be greater than or equal to 1")
}
}
if(prev<=0 | prev>=1) {
stop("Prevalence must be between 0 and 1")
}
if(USE == "RR") {
message("prev=", prev, "; RR1=", RR1, ", RR2=", RR2, "; alpha=", alpha,
"; Ncase=", Ncase, ", Ncntl=", Ncntl)
}
if(USE == "OR") {
message("prev=", prev, "; OR1=", OR1, ", OR2=", OR2, "; alpha=", alpha,
"; Ncase=", Ncase, ", Ncntl=", Ncntl)
}
nrow = dim(table)[1]; ncol = dim(table)[2]
## translate the input table into a probability distribution
table = table/sum(table)
## set the initial value to 0 and cummulate over all cells of input table
cumpower = 0
for(i in 1:nrow) {
for(j in 1:ncol) {
maxr2 = i/nrow # maximum r2 for cell (i,j)
p = j/(2*ncol) # frequency of the minor and risk allele for cell (i,j)
## Pr(genotype) in population
popufreq = c((1-p)^2, 2*p*(1-p), p^2)
## Calculate penetrances if disease model is specified through OR
if(USE == "OR") {
ORsolve = function(p0) {
odds1 = OR1*p0/(1-p0); p1 = odds1/(1+odds1)
odds2 = OR2*p0/(1-p0); p2 = odds2/(1+odds2)
(sum(c(p0, p1, p2) * popufreq)/prev - 1)^2
}
pen0 = optimize(ORsolve, interval=c(0,1), tol=1e-10)$minimum
odds1 = OR1*pen0/(1-pen0); pen1 = odds1/(1+odds1)
odds2 = OR2*pen0/(1-pen0); pen2 = odds2/(1+odds2)
}
## Calculate penetrances if disease model is specified through RR
if(USE == "RR") {
pen0 = prev / sum(c(1,RR1,RR2) * popufreq)
pen1 = pen0 * RR1
pen2 = pen0 * RR2
## Check if all three penetrances are in [0, 1].
## Because we require RR1>=1 and RR2>=1, we have 0 < pen0 <= prev.
## Then, as long as RR<=1/prev, then pen1 and pen2 are <=1.
## If there is a possibility that pen1 and pen2 are >1, reset
## their values and re-calculate prevalence.
if(max(RR1,RR2) > 1/prev) {
pen1 = min(1, pen1)
pen2 = min(1, pen2)
prev = sum(c(pen0, pen1, pen2) * popufreq)
}
}
## Pr(genotype | case) and Pr(genotype | control).
casefreq = c(pen0, pen1, pen2) * popufreq / prev
cntlfreq = (1 - c(pen0, pen1, pen2)) * popufreq / (1-prev)
## Average observed allele counts in the long run.
## With sample size N at tag SNP with maximum r2, the effective sample size
## at disease variant is about N*maxr2.
caseobs = 2 * Ncase * maxr2 * c(sum(casefreq * c(1,.5,0)),
sum(casefreq * c(0,.5,1)))
cntlobs = 2 * Ncntl * maxr2 * c(sum(cntlfreq * c(1,.5,0)),
sum(cntlfreq * c(0,.5,1)))
## Expected allele counts.
## Rows are alleles 0 and 1; columns are case and control.
expected = (caseobs + cntlobs) %*% t(c(sum(caseobs), sum(cntlobs))) /
sum(caseobs + cntlobs)
## Non-centrality parameter for the allelic chi-squared test.
noncen = sum((c(caseobs, cntlobs) - as.numeric(expected))^2 /
as.numeric(expected))
## Power for the cell and cummulated power.
cellpower = 1 - pchisq(qchisq(1-alpha,1), 1, ncp=noncen)
cumpower = cumpower + table[i,j] * cellpower
}
}
cumpower
}
#### example function call
#CEUtableI550K = as.matrix(read.table("CEUtableI550K.txt"))
#GWApower(CEUtableI550K, prev=0.05, GRR=1.4, N=2000, alpha=1e-7)
#GWApower(CEUtableI550K, prev=0.05, RR1=1.5, RR2=1.5, N=2000, alpha=1e-7)
#GWApower(CEUtableI550K, prev=0.05, OR=1.4, N=2000, alpha=1e-7)
#GWApower(CEUtableI550K, prev=0.05, OR1=1.5, OR2=1.5, N=2000, alpha=1e-7)