#### Calculation genome-wide overall power for a SNP chip.
#### Chun Li, 10/29/2007
#### The method is described in 
#### Li et al. (2007) Evaluating cost efficiency of SNP chips in genome-wide association studies.

#### The calculation is based on the allele-based case-control association test.
#### The minor allele is treated as the risk allele.
#### Genotypes are denoted as 0, 1, 2, the number of copies of risk allele.
#### The disease prevalence and GRRs (or ORs) are assumed to be the same for all disease variants.

#### table: Input table for a SNP chip.  The rows are intervals for max r2, partitioned evenly to
####        cover [0,1].  The columns are intervals for MAF, partitioned evenly to cover [0,.5].  
#### prev:  Disease prevalence
#### GRR:   Genotype relative risk (see below)
#### RR1, RR2:  Genotype relative risks for genotypes 1 and 2 compared to genotype 0.
####            If both are unspecified, use RR1=GRR and RR2=RR1*RR1.
####            If RR2 is unspecified, use RR2=RR1*RR1.
####            Both RR1 and RR2 should be >=1.
####            If unspecified, multiplicative model is used: RR2=RR1*RR1.
#### OR:    Genotype odds ratio (see below)
#### OR1, OR2:  Odds ratio for genotypes 1 and 2 compared to genotype 0.
####            If both are unspecified, use OR1=OR and OR2=OR1*OR1.
####            If OR2 is unspecified, use OR2=OR1*OR1.
####            Both OR1 and OR2 should be >=1.
#### N:     Sample size (see below)
#### Ncase, Ncntl:  Number of cases and controls.
####            If unspecified, use N.
#### alpha: Significance level

GWApower2 = function(table, prev, GRR=NULL, OR=NULL, N=NULL, alpha=0.05,
                    RR1=GRR, RR2=RR1*RR1, OR1=OR, OR2=OR1*OR1, Ncase=N, Ncntl=N) {
  ## R is unlike C.  For code (A & B), R will evaluate both, while C will evaluate A
  ## first and evaluate B only when A is true.
  ## Because of this, I have to use nested if to avoid error messages.

  if(is.null(RR1) & is.null(OR1)) {
  ## When both RR and OR are null: stop.
    stop("Either relative risk or odds ratio needs to be specified.")
  } else if(!is.null(RR1) & !is.null(OR1)) {
  ## When both RR and OR are non-null: if both are NA, stop; if both are not NA, use RR.
    if(is.na(RR1) & is.na(OR1)) {
      stop("Either relative risk or odds ratio needs to be specified.")
    }
    if(!is.na(RR1) & !is.na(OR1)) {
      USE = "RR"
      warning("Both relative risk and odds ratio are specified.  Relative risks are used.")
      OR = OR1 = OR2 = NULL
    }
  } else {
  ## When only one of RR and OR is non-null: use it
    if(!is.null(RR1)) USE = "RR"
    if(!is.null(OR1)) USE = "OR"
  }

  ## check to ensure RRs>=1, ORs>=1, and prev is in (0,1)
  if(USE == "RR") {
    if(RR1<1 | RR2<1) {
      stop("Relative risks must be greater than or equal to 1")
    }
  }
  if(USE == "OR") {
    if(OR1<1 | OR2<1) {
      stop("Odds ratios must be greater than or equal to 1")
    }
  }
  if(prev<=0 | prev>=1) {
    stop("Prevalence must be between 0 and 1")
  }

  if(USE == "RR") {
    message("prev=", prev, "; RR1=", RR1, ", RR2=", RR2, "; alpha=", alpha,
            "; Ncase=", Ncase, ", Ncntl=", Ncntl)
  }
  if(USE == "OR") {
    message("prev=", prev, "; OR1=", OR1, ", OR2=", OR2, "; alpha=", alpha,
            "; Ncase=", Ncase, ", Ncntl=", Ncntl)
  }

  nrow = dim(table)[1]; ncol = dim(table)[2]
  ## translate the input table into a probability distribution
  table = table/sum(table)

  ## set the initial value to 0 and cummulate over all cells of input table
  cumpower = 0
  for(i in 1:nrow) {
    for(j in 1:ncol) {
      maxr2 = i/nrow  # maximum r2 for cell (i,j)
      p = j/(2*ncol)  # frequency of the minor and risk allele for cell (i,j)
      ## Pr(genotype) in population
      popufreq = c((1-p)^2, 2*p*(1-p), p^2)

      ## Calculate penetrances if disease model is specified through OR
      if(USE == "OR") {
        ORsolve = function(p0) {
          odds1 = OR1*p0/(1-p0); p1 = odds1/(1+odds1)
          odds2 = OR2*p0/(1-p0); p2 = odds2/(1+odds2)
          (sum(c(p0, p1, p2) * popufreq)/prev - 1)^2
        }
        pen0 = optimize(ORsolve, interval=c(0,1), tol=1e-10)$minimum
        odds1 = OR1*pen0/(1-pen0); pen1 = odds1/(1+odds1)
        odds2 = OR2*pen0/(1-pen0); pen2 = odds2/(1+odds2)
      }

      ## Calculate penetrances if disease model is specified through RR
      if(USE == "RR") {
        pen0 = prev / sum(c(1,RR1,RR2) * popufreq)
        pen1 = pen0 * RR1
        pen2 = pen0 * RR2

        ## Check if all three penetrances are in [0, 1].
        ## Because we require RR1>=1 and RR2>=1, we have 0 < pen0 <= prev.
        ## Then, as long as RR<=1/prev, then pen1 and pen2 are <=1.
        ## If there is a possibility that pen1 and pen2 are >1, reset
        ## their values and re-calculate prevalence.
        if(max(RR1,RR2) > 1/prev) {
          pen1 = min(1, pen1)
          pen2 = min(1, pen2)
          prev = sum(c(pen0, pen1, pen2) * popufreq)
        }
      }

      ## Pr(genotype | case) and Pr(genotype | control).
      casefreq = c(pen0, pen1, pen2) * popufreq / prev
      cntlfreq = (1 - c(pen0, pen1, pen2)) * popufreq / (1-prev)

      ## Average observed allele counts in the long run.
      ## With sample size N at tag SNP with maximum r2, the effective sample size
      ## at disease variant is about N*maxr2.
      caseobs = 2 * Ncase * maxr2 * c(sum(casefreq * c(1,.5,0)),
                                      sum(casefreq * c(0,.5,1)))
      cntlobs = 2 * Ncntl * maxr2 * c(sum(cntlfreq * c(1,.5,0)),
                                      sum(cntlfreq * c(0,.5,1)))
      ## Expected allele counts.
      ## Rows are alleles 0 and 1; columns are case and control.
      expected = (caseobs + cntlobs) %*% t(c(sum(caseobs), sum(cntlobs))) /
                   sum(caseobs + cntlobs)

      ## Non-centrality parameter for the allelic chi-squared test.
      noncen = sum((c(caseobs, cntlobs) - as.numeric(expected))^2 /
                     as.numeric(expected))
      ## Power for the cell and cummulated power.
      cellpower = 1 - pchisq(qchisq(1-alpha,1), 1, ncp=noncen)
      cumpower = cumpower + table[i,j] * cellpower
    }
  }
  cumpower
}


#### example function call
#CEUtableI550K = as.matrix(read.table("CEUtableI550K.txt"))
#GWApower2(CEUtableI550K, prev=0.05, GRR=1.4, N=2000, alpha=1e-7)
#GWApower2(CEUtableI550K, prev=0.05, RR1=1.5, RR2=1.5, N=2000, alpha=1e-7)
#GWApower2(CEUtableI550K, prev=0.05, OR=1.4, N=2000, alpha=1e-7)
#GWApower2(CEUtableI550K, prev=0.05, OR1=1.5, OR2=1.5, N=2000, alpha=1e-7)