大阪市立大学医療統計学

## Last updated: 2021-11-11 14:40
library(rms)

1 Simple t test

Let’s write a function to simulate data from \(Normal(\mu_1,\sigma_1)\) and \(Normal(\mu_2,\sigma_2)\) and conduct a t-test for \[\begin{align*} H_0: \mu_1 = \mu_2\\ H_1: \mu_1 > \mu_2 \end{align*}\]

  1. Inputs are n1, mu1, sig1, n2, mu2, sig2, and B. (n1 and n2 are the sample sizes, and B is the simulation size.)

  2. Output is the p-value.

sim1a <- function(n1, mu1, sig1, n2=n1, mu2, sig2, B){
    # n1 and n2 are sample sizes.
    # two-sample t-test, one-sided alternative.
    pv <- NULL
    for(i in 1:B){  ## or seq_len(B)
    ## Generate data using rnorm().
      x1 <- rnorm(n1,mu1,sig1)
      x2 <- rnorm(n2,mu2,sig2)
    ## Conduct t-test using t.test(... ..., alt='greater').
    ## Retain the p-value.
      out <- t.test(x1,x2, alt='greater')$p.value
    pv <- append(pv, out)
    }
    pv
}

Let’s assume \(\sigma_1 = \sigma_2 = 20\), \(\mu_1=\mu_2=100\) under \(H_0\) and \(\mu_1=105\), \(\mu_2=100\) under \(H_1\).

sim1aNul <- sim1a(n1=120, mu1=100, sig1=20, mu2=100, sig2=20, B=1000)
sim1aAlt <- sim1a(n1=120, mu1=105, sig1=20, mu2=100, sig2=20, B=1000)

To compute type I error rate and power, we can

table( sim1aNul < 0.025 )
## 
## FALSE  TRUE 
##   983    17
table( sim1aAlt < 0.025 )
## 
## FALSE  TRUE 
##   542   458
power.t.test(n=120, delta=5, sd=20, power=NULL)
## 
##      Two-sample t test power calculation 
## 
##               n = 120
##           delta = 5
##              sd = 20
##       sig.level = 0.05
##           power = 0.48752
##     alternative = two.sided
## 
## NOTE: n is number in *each* group

Question 1

How many simulations do we need to run?

simSize <- 1000
binconf(x=0.025*simSize, n=simSize, method='all')
##            PointEst     Lower     Upper
## Exact         0.025 0.0162425 0.0366848
## Wilson        0.025 0.0169901 0.0366453
## Asymptotic    0.025 0.0153235 0.0346765

We can use the width of a confidence interval to derive the necessary sample size (simulation size).

\[\begin{align*} \hat{p} \pm 1.96 \frac{\sqrt{\hat{p}(1-\hat{p})}}{\sqrt{B}} \end{align*}\]

If we want the width to be shorter than \(w\), then \[\begin{align*} (2^2)(1.96^2)(0.5^2) &< w^2B\\ 3.8416/w^2 &< B \end{align*}\] for \(w=0.01\), \(B > 38416\).

simSize <- 38416 
binconf(x=0.025*simSize, n=simSize, method='all')
##            PointEst     Lower     Upper
## Exact         0.025 0.0234617 0.0266106
## Wilson        0.025 0.0234856 0.0266094
## Asymptotic    0.025 0.0234388 0.0265612
binconf(x=0.5*simSize, n=simSize, method='all')
##            PointEst    Lower    Upper
## Exact           0.5 0.494987 0.505013
## Wilson          0.5 0.495000 0.505000
## Asymptotic      0.5 0.495000 0.505000

Question 2

What’s the sample size for power = 90%?

sim1bAlt <- sim1a(n1=240, mu1=105, sig1=20, mu2=100, sig2=20, B=1000)

table(sim1bAlt < 0.025)
## 
## FALSE  TRUE 
##   211   789

For this simple situation, we don’t need to run a simulation.

power.t.test(n=NULL, delta=5, sd=20, power=0.90)
## 
##      Two-sample t test power calculation 
## 
##               n = 337.201
##           delta = 5
##              sd = 20
##       sig.level = 0.05
##           power = 0.9
##     alternative = two.sided
## 
## NOTE: n is number in *each* group
sim1cAlt <- sim1a(n1=340, mu1=105, sig1=20, mu2=100, sig2=20, B=1000)

table(sim1cAlt < 0.025)
## 
## FALSE  TRUE 
##    95   905

Question 3 What if we want \(n_1 = 2n_2\)?

N1 <- 340
sim1dAlt <- sim1a(n1=N1, mu1=105, sig1=20, n2=N1/2, mu2=100, sig2=20, B=1000)

table(sim1dAlt < 0.025)
## 
## FALSE  TRUE 
##   224   776
N1 <- 400
sim1eAlt <- sim1a(n1=N1, mu1=105, sig1=20, n2=N1/2, mu2=100, sig2=20, B=1000)

table(sim1eAlt < 0.025)
## 
## FALSE  TRUE 
##   179   821

To maintain the same power, we need the harmonic mean of \(n_1\) and \(n_2\) to be \(340\). \[\begin{align*} \frac{2}{\frac{1}{2n_2}+\frac{1}{n_2}} = 340 \end{align*}\]

harmonic.mean <- function(v) 1/mean(1/v)

harmonic.mean( c(400,200))
## [1] 266.667
hm2 <- function(n2) harmonic.mean(c(2*n2, n2))
  f <- function(n2) hm2(n2)-340
uniroot(f=f, lower=0, upper=1000)
## $root
## [1] 255
## 
## $f.root
## [1] 0
## 
## $iter
## [1] 1
## 
## $init.it
## [1] NA
## 
## $estim.prec
## [1] 745

Thus, \(n_2=255\) and \(n_1=255\times 2=510\).

N1 <- 510
sim1fAlt <- sim1a(n1=N1, mu1=105, sig1=20, n2=N1/2, mu2=100, sig2=20, B=1000)

table(sim1fAlt < 0.025)
## 
## FALSE  TRUE 
##   100   900

Sometimes, you may want to repeat the simulation. It is possible by using the same random seed.

sim2a <- function(n1, mu1, sig1, n2=n1, mu2, sig2, B, SEED=NULL){
    # n1 and n2 are sample sizes.
    # two-sample t-test, one-sided alternative.
    pv <- NULL
    if(is.null(SEED)) SEED <- sample(1:100000, 1)
    set.seed(SEED)
    for(i in 1:B){  ## or seq_len(B)
    ## Generate data using rnorm().
      x1 <- rnorm(n1,mu1,sig1)
      x2 <- rnorm(n2,mu2,sig2)
    ## Conduct t-test using t.test(... ..., alt='greater').
    ## Retain the p-value.
      out <- t.test(x1,x2, alt='greater')$p.value
    pv <- append(pv, out)
    }
    list(pv, SEED)
}

N1 <- 510
sim2aAlt <- sim2a(n1=N1, mu1=105, sig1=20, n2=N1/2, mu2=100, sig2=20, B=1000, SEED=2525)

table( sim2aAlt[[1]] < 0.025)
## 
## FALSE  TRUE 
##   107   893
sim2bAlt <- sim2a(n1=N1, mu1=105, sig1=20, n2=N1/2, mu2=100, sig2=20, B=1000, SEED=2525)

table( sim2bAlt[[1]] < 0.025)
## 
## FALSE  TRUE 
##   107   893

Practice: Let’s make the following changes to sim2a function.

  1. Make the data all intergers.
  2. Add Wilcoxon rank sum test so we can compare the powers and investigate when these two tests disagree.
sim3a <- function(n1, mu1, sig1, n2=n1, mu2, sig2, B, SEED=NULL){
    # n1 and n2 are sample sizes.
    # two-sample t-test, one-sided alternative.
  pv.t <- pv.w <- seed <- NULL
  if(is.null(SEED)) SEED <- sample(1:100000, 1)
    set.seed(SEED)
      seed <- sample(1:100000, B, replace=FALSE)
    for(i in 1:B){  ## or seq_len(B)
    ## Generate data using rnorm().
      set.seed(seed[i])
      x1 <- round( rnorm(n1,mu1,sig1) )
      x2 <- round( rnorm(n2,mu2,sig2) )
    ## Conduct t-test using t.test(... ..., alt='greater').
    ## Retain the p-value.
      out.t <- t.test(x1,x2, alt='greater')$p.value
      out.w <- suppressWarnings(wilcox.test(x1,x2, alt='greater')$p.value)
    pv.t <- append(pv.t, out.t)
    pv.w <- append(pv.w, out.w)
    }
    data.frame(pv.t, pv.w, seed)
}

N1 <- 10 
sim3aNul <- sim3a(n1=N1, mu1=100, sig1=20, n2=N1/2, mu2=100, sig2=20, B=1000, SEED=2525)

Let’s compare the p-values of t-test and Wilcoxon rank sum test.

plot(0, xlim=c(0,0.1), ylim=c(0,0.1), xlab='t test', ylab='Wilcoxon test', 
     panel.first=grid(nx=NULL,ny=NULL), type='n')
abline(0,1, col=grey(0.9))
points(sim3aNul$pv.t, sim3aNul$pv.w, pch=19, cex=0.9, col='#77777788')

abline(h=0.025, col=grey(0.6))
abline(v=0.025, col=grey(0.6))

Let’s identify the two data sets that produced very different p-values between t-test and Wilcoxon rank sum test.

( check <- which((sim3aNul$pv.t < 0.025 & sim3aNul$pv.w > 0.05) | (sim3aNul$pv.t > 0.05 & sim3aNul$pv.w < 0.025)) )
## [1] 513 915
sim3aNul[check,]
##          pv.t      pv.w  seed
## 513 0.0198693 0.0613689 44925
## 915 0.0566123 0.0158910 25793

Because we know the random seeds that produced these data, we can investigate the data further.

set.seed(sim3aNul$seed[check[1]])
      x1 <- round( rnorm(n=10,mean=100,sd=20) )
      x2 <- round( rnorm(n= 5,mean=100,sd=20) )
pvt1 <- t.test(x1,x2, alt='greater')$p.value
pvw1 <- suppressWarnings( wilcox.test(x1,x2, alt='greater')$p.value )

list(x1=sort(x1), x2=sort(x2), p.val=c(pvt1,pvw1))
## $x1
##  [1]  93  93  95  99 101 104 116 118 118 119
## 
## $x2
## [1]  84  93  95  95 105
## 
## $p.val
## [1] 0.0198693 0.0613689
set.seed(sim3aNul$seed[check[2]])
      y1 <- round( rnorm(n=10,mean=100,sd=20) )
      y2 <- round( rnorm(n= 5,mean=100,sd=20) )
pvt2 <- t.test(y1,y2, alt='greater')$p.value
pvw2 <- suppressWarnings( wilcox.test(y1,y2, alt='greater')$p.value )

list(x1=sort(y1), x2=sort(y2), p.val=c(pvt2,pvw2))
## $x1
##  [1]  93  97  99 105 109 109 110 114 114 126
## 
## $x2
## [1]  47  92  94 100 102
## 
## $p.val
## [1] 0.0566123 0.0158910

2 Randomization

Question: If we use a simple randomization, how much discrepancy is expected if N=20?

simple.rand <- function(N,B, SEED=NA){
 if(is.na(SEED)) SEED <- sample(1:100000,1)
 set.seed(SEED)
 dat <- matrix(
   sample(c(-1,1),N*B,rep=TRUE), ncol=N
 )
  abs(apply(dat, 1, sum))
}
 
sr <- simple.rand(N=20, B=10000, SEED=NA)

table(sr)
## sr
##    0    2    4    6    8   10   12   14   16 
## 1761 3218 2379 1473  761  290   90   23    5

Compare the result with \(Binomial(20, 0.5)\)

table(sr) / length(sr) 
## sr
##      0      2      4      6      8     10     12     14     16 
## 0.1761 0.3218 0.2379 0.1473 0.0761 0.0290 0.0090 0.0023 0.0005
disc <- seq(0,12,2) ## discrepancy 
p0 <- dbinom(10,20,0.5)
p1 <- dbinom(9:4,20,0.5) * 2
data.frame(disc, p=c(p0,p1))
##   disc         p
## 1    0 0.1761971
## 2    2 0.3203583
## 3    4 0.2402687
## 4    6 0.1478577
## 5    8 0.0739288
## 6   10 0.0295715
## 7   12 0.0092411

Question: If we use a simple randomization (N=200) and force even split (100/100), what is the biggest discrepancy in the entire process?

evenSplit <- function(N=200, B=1000, SEED=NA){
    max.disc <- numeric(B) 
  if(is.na(SEED)) SEED <- sample(1:100000,1)
  set.seed(SEED)
    each.seed <- sample(1:100000, B, rep=FALSE)
  for(i in seq_len(B)){
    set.seed(each.seed[i])
    dat <- sample( rep(c(-1,1),N/2), N, rep=FALSE )
    max.disc[i] <- max(abs(cumsum(dat)))
  } 
  cbind(max.disc, each.seed)
}
  
eS <- evenSplit(N=200, B=1000, SEED=747)

table(eS[,1])
## 
##   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20  21  22  23  24 
##   6  29  61  98 112 103  98  94 102  79  56  48  40  21  25  12   3   1   5   4 
##  25  27 
##   2   1
N <- 200
set.seed(58579)
dat <- sample( rep(c(-1,1),N/2), N, rep=FALSE )

plot(cumsum(dat), ylab='Discrepancy', type='l')

Question: If we use a simple randomization (N=200) and force even split (100/100), what is the longest assignment to the same group?

## Hint

RunHT <- function(N=200, B=1000, SEED=NA){
    max.runs <- numeric(B) 
  if(is.na(SEED)) SEED <- sample(1:100000,1)
  set.seed(SEED)
    each.seed <- sample(1:100000, B, rep=FALSE)
  for(i in seq_len(B)){
    set.seed(each.seed[i])
    dat <- sample( rep(c(-1,1),N/2), N, rep=FALSE )
    max.runs[i] <- max(rle(dat)$lengths) 
  } 
  cbind(max.runs, each.seed)
}

runs.1 <- RunHT(N=200, B=1000, SEED=113)
par(lend=1)
plot( table(runs.1[,1]), lwd=10, ylab='Longest run', yaxs='i')

An R program for block randomization.

withinStrat <- function(N=400, SEED=NULL){
    ## For each stratum, generate a sequence of length N.
        if(!length(SEED) %in% c(0,2)) stop('SEED must be NULL or of length 2.')
    if(is.null(SEED)) SEED <- sample(1:10000, 2, FALSE)

    set.seed(SEED[1])
    Block.Size <- sample(2*(1:3), N/2, replace=TRUE, prob=c(0.2,0.6,0.2))
            includ <- min(which(cumsum(Block.Size) >= N))
        Block.Size <- Block.Size[1:includ]

    out <- NULL
    set.seed(SEED[2])
    for(i in 1:length(Block.Size)){
        out <- c(out, sample(rep(c('A','B'), Block.Size[i]/2), Block.Size[i], rep=FALSE))
    }
    block.id <- rep(1:length(Block.Size), Block.Size)
    block.size <- rep(Block.Size, Block.Size)

    tab <- data.frame(Block_Id=block.id, Block_Size=block.size, Treatment_Arm=out, Rand_Seq=1:length(out))
    list(tab, SEED)
}

3 Chi-square test with small cells

It is generally believed that a chi-square test is not appropriate when some cells are small because it is based on a large-sample theory. Let’s conduct a simulation study to compare chisq.test with and without Yate’s correction, and fisher.test.

We will generate data under the null hypothesis of no association and compute frequency of rejection.

chiSqSim <- function(n, px, py, B, SEED=NULL){
  ## n is total sample size.
  ## px = P[X1=x1] (marginal probability 1) 
  ## py = P[Y1=y1] (marginal probability 2)
  ## px and py are length 1. 
  ## B is the number of simulations.
  
  ## n <- 100 ; px <- 0.2 ; py <- 0.1 ; B <- 10 ; SEED <- NULL 
      sw <- suppressWarnings
  p4 <- outer(c(px,1-px), c(py,1-py), FUN='*')
    if(is.null(SEED)) SEED <- sample(1:10000,1)
  set.seed(SEED)
  d <- rmultinom(B, size=n, prob=p4)
    three.tests <- function(v){
      m <- cbind(v[1:2],v[3:4])
      p.fish <- fisher.test(m)$p.val
      p.chsq <- sw(chisq.test(m, correct=FALSE))$p.val
      p.chcc <- sw(chisq.test(m, correct=TRUE ))$p.val
      c(p.fish, p.chsq, p.chcc)
    }
    out <- t( apply(d, MARGIN=2, FUN=three.tests) )
    out <- data.frame(out)
    names(out) <- c('Fish','Chsq','Chcc')
    list(d,out)
}
    
css.format <- function(css){
  ## css is output of chiSqSim.
   pva <- css[[2]]
      dat <- css[[1]]
   rej <- pva < 0.05
   typeI <- apply(rej, 2, function(v) sum(v) / length(v))
   typeI
}    

sim4a <- chiSqSim(n=200, px=0.1, py=0.6, B=1000) 
  css.format(sim4a)
##  Fish  Chsq  Chcc 
## 0.033 0.040 0.028
sim4b <- chiSqSim(n=20, px=0.1,  py=0.6, B=1000) 
  css.format(sim4b)
##  Fish  Chsq  Chcc 
## 0.009    NA    NA
sim4c <- chiSqSim(n=20, px=0.05, py=0.6, B=1000) 
  css.format(sim4c)
## Fish Chsq Chcc 
##    0   NA   NA

What are these NA’s?

css.format <- function(css){
  ## css is output of chiSqSim.
   pva <- css[[2]]
      dat <- css[[1]]
   rej <- pva < 0.05
   typeI <- apply(rej, 2, function(v) sum(v, na.rm=TRUE) / length(v))
   Fail  <- apply(rej, 2, function(v) sum(is.na(v)) / length(v))
   list(typeI=typeI, Fail=Fail)
}

css.format(sim4a)
## $typeI
##  Fish  Chsq  Chcc 
## 0.033 0.040 0.028 
## 
## $Fail
## Fish Chsq Chcc 
##    0    0    0
css.format(sim4b)
## $typeI
##  Fish  Chsq  Chcc 
## 0.009 0.027 0.001 
## 
## $Fail
##  Fish  Chsq  Chcc 
## 0.000 0.124 0.124
css.format(sim4c)
## $typeI
## Fish Chsq Chcc 
## 0.00 0.01 0.00 
## 
## $Fail
##  Fish  Chsq  Chcc 
## 0.000 0.377 0.377

4 Power and type I error rate of an adaptive design

Consider the following 3-stage adaptive hypothesis testing strategy: Take a random sample of size \(n_1=20\) from Normal(\(\mu\),\(1\)) and test \(H_0: \mu=0\), \(H_1: \mu>0\) at \(\alpha=0.025\). If \(H_0\) is rejected, the study is finished; if not, take another random sample of size \(n_2=20\) and test again using all \(n_1+n_2\) data. If \(H_0\) is still not rejected, take another random sample of size \(n_3=20\) and test again using all the data. Use Z-test.

  • At end of each stage, use \(z_{0.975}\) as the critical value.

Questions

  1. Is the type I error rate inflated?
  2. Bonferroni-type calculation gives \(1-(1-0.025)^3 = 0.073\) as the inflated type I error rate. Will the type I error rate be of this 3-stage design bigger or smaller than this? (for reasonable \(n_1\), \(n_2\), \(n_3\))

Programming plans

  1. Avoid “if”.
  2. Make \(n_1\), \(n_2\), \(n_3\) user inputs.
  3. Make the critical values user inputs.
  4. Outputs: p-value, final sample size, and \(\overline{x}\).
adap3stage <- function(n1,n2,n3, mu, sig=1, B, SEED=NULL, alp=0.025){
  ## Three stage design  
  ## Repeat Z > qnorm(1-alp) until H0 is rejected.
   
  # Generate data #
  X <- matrix(rnorm(n=(n1+n2+n3)*B, mean=mu, sd=sig), ncol=B)
    X <- round(X, 1)
  
  compute.p.xbar <- function(v,m,s,n1,n2,n3, alp=0.025){ 
    # v is a (n1+n2+n3) by 1 vector of the data
    compute.xz <- function(w,m,s){
      n <- length(w)
      xbar <- mean(w)
      zval <- (xbar-m) / (s/sqrt(n))
      pval <- 1-pnorm(zval)
      c(n,xbar,pval)
    }
    out <- cbind(
    compute.xz(v[1:n1],m,s),
    compute.xz(v[1:(n1+n2)],m,s),
    compute.xz(v[1:(n1+n2+n3)],m,s)
    )
    pick.this <- min(c(which(out[3,] < alp),3))
    out[,pick.this]
  } 

out <- t(apply(X, MARGIN=2, FUN=compute.p.xbar, m=mu,s=sig,n1=n1,n2=n2,n3=n3) )
out <- data.frame(out)
  names(out) <- c('n','z','p')
out
}

a1 <- adap3stage(n1=40,n2=20,n3=20, mu=0, sig=1, B=10000)

4.1

コントロール群の奏効割合が50%と仮定して、新薬の効果を検定する臨床試験を計画する。

\[\begin{align*} H_0:& p_t = p_c \\ H_1:& p_t > p_c \end{align*}\]

Type I error rate は 2.5%、検出力は新薬の奏効割合が80%の時0.8とする。シンプルな1ステージデザインのサンプルサイズは:

power.prop.test(n=NULL, p1=0.50, p2=0.80, sig.level=0.025, power=0.80, alternative='one.sided')
## 
##      Two-sample comparison of proportions power calculation 
## 
##               n = 38.48
##              p1 = 0.5
##              p2 = 0.8
##       sig.level = 0.025
##           power = 0.8
##     alternative = one.sided
## 
## NOTE: n is number in *each* group

各群40とする。

各群のサンプルサイズが20の時に一度データを見て、その結果によってサンプルサイズを各群60まで増やせるとする。また中間解析時にp値が0.005未満であったら帰無仮説を棄却して終了とする。

z値を計算するプログラム:

compute.z <- function(xC, xT, n, CORRECT=!TRUE){
                ## One-sided x>y (doesn't matter if CORRECT=FALSE)
                ## p2 > p1 under the alternative.
                ## same as prop.test( c(x1,x2), c(n,n), correct=CORRECT)$stat
                 phatT <- xT/n
                 phatC <- xC/n
                 pp <- (xT+xC)/(n+n)
                    correction <- ifelse(CORRECT, (1/2)*(1/n+1/n), 0)
                  z <- (phatT-phatC-correction) / sqrt(pp*(1-pp)*(1/n+1/n))
                  z[pp %in% c(0,1)] <- 0
                  z
}

xcとxtが与えられた場合のp値を計算する。

## n <- 20
xc <- xt <- 0:20

Zvalue <- outer(xc,xt, FUN=compute.z, n=20)
Pvalue <- 1-pnorm(Zvalue)

plot(0, type='n', xlim=c(0,20), ylim=c(0,20), xlab='Control', ylab='Treatment')
title(main='Red = "p.value < 0.005"', font.main=3, adj=0)
for(i in 1:21){
  pv <- Pvalue[i,]  
  cc <- ifelse(pv<0.005, 'red','blue')
  points(rep(i-1,21), 0:20, col=cc, cex=0.4, pch=19)
}

第1ステージで帰無仮説を棄却しない場合(青い点)、第2ステージで棄却する確率 (conditional power) を計算する。まず、サンプルサイズを変えない場合。(n2=20)

CondPow <- function(n1=20, n2=20, n.add=0, pC=0.50, pT=0.80, xC, xT, alp2, CORRECT=FALSE){
  ## xT and xC are the Stage 1 data.
  ## alp2 is the type I error rate for the second stage (constant)
  
  ## Stage 2
  yT <- seq(0, n2+n.add)
  yC <- seq(0, n2+n.add)
  
  ## Stage 2 p-value
  all.pairs <- outer(xC+yC, xT+yT, FUN=compute.z, n=n1+n2+n.add, CORRECT=CORRECT)
  all.rejec <- all.pairs > qnorm(1-alp2) 
    C.all <- rep(yT,each=length(yC))
    T.all <- rep(yC,length(yT))
  REJ <- data.frame(Con=C.all[t(all.rejec)], Trt=T.all[t(all.rejec)], Z=all.pairs[all.rejec])
  REJ$joint <- dbinom(REJ$Con, n2+n.add, pC) * dbinom(REJ$Trt, n2+n.add, pT)
  
  sum(REJ$joint)
}

例えば、第1ステージのデータが xC=8, xT=10 だったとすると、conditional power は

CondPow(n1=20,n2=20, n.add=0, pC=0.50, pT=0.80, xC=8, xT=10, alp2=0.02)
## [1] 0.311489

サンプルサイズを変えない場合の conditional power.

xC <- 0:20
xT <- 0:20

## cpf <- function(xC,xT,n1,n2,n.add,pC,pT,alp2) CondPow(n1,n2,n.add,pC,pT,xC,xT,alp2)
## cp <- outer(xC, xT, FUN=cpf, n1=20,n2=20,n.add=0,pC=0.50,pT=0.80,alp2=0.02)

conp <- outer(xC,xT, '+')
for(i in 0:20){
  for(j in 0:20){
    conp[i+1,j+1] <- CondPow(n1=20,n2=20,n.add=0, pC=0.5,pT=0.8, xC=xC[i+1],xT=xT[j+1], alp2=0.02)
  }
}

plot(0, type='n', xlim=c(0,20), ylim=c(0,20), xlab='Control: Stage 1', ylab='Treatment: Stage 1',
    panel.first=grid(nx=0,ny=0))
title(main='Conditional Power (n2=20)', font.main=3, adj=0)
for(i in 0:20){
  for(j in 0:20){
    text(i,j, round(conp[i+1,j+1]*100), cex=0.8)
  }
}

例えば Xc=10, Xt=12 の場合、conditional power は 37% しかない。これをサンプルサイズを最大 (n2=40) まで増やすと conditional power は下図のようになる。

conp <- outer(xC,xT, '+')
for(i in 0:20){
  for(j in 0:20){
    conp[i+1,j+1] <- CondPow(n1=20,n2=20,n.add=20, pC=0.5,pT=0.8, xC=xC[i+1],xT=xT[j+1], alp2=0.02)
  }
}

plot(0, type='n', xlim=c(0,20), ylim=c(0,20), xlab='Control: Stage 1', ylab='Treatment: Stage 1',
    panel.first=grid(nx=0,ny=0))
title(main='Conditional Power (n2=40)', font.main=3, adj=0)
for(i in 0:20){
  for(j in 0:20){
    text(i,j, round(conp[i+1,j+1]*100), cex=0.8)
  }
}

Xc=10, Xt=12 の場合の conditional power は 79% となった。仮に Xc=10, Xt=5 の場合はサンプルサイズを増やしても conditional power は 14% でしかないので早期打ち切りが妥当である。またサンプルサイズを増やさなくても conditional power が 高い時は \(n_2\)=20とする。

最終的に、以下のようなデザインができる。

  1. 第1ステージでのp値が0.005以下の場合、早期打ち切りで帰無仮説を棄却する。
  2. 第2ステージのサンプルサイズを40に増やしてもconditional powerが50%に満たない場合は早期打ち切りとする。
  3. 第2ステージのサンプルサイズは最低で20とし、conditional powerが80%になるように設定する。ただしその値が40を超える場合は40とする。
library(ssanv)

compute.n2 <- function(n1=20,n2=20, pC=0.5,pT=0.8, xC,xT, alp2=0.02){
  ## Conditional power が 80% になるような n2 を計算する。
  f <- function(n.add, n1,n2, pC,pT, xC,xT, alp2){
     round(CondPow(n1,n2, n.add, pC,pT, xC,xT, alp2) - 0.80, 3)
  }
  uniroot.integer(f, lower=0, upper=100,n1=n1,n2=n2,pC=pC,pT=pT,xC=xC,xT=xT, alp2=alp2)$root
}

xC <- 0:20
xT <- 0:20
st1<- data.frame(expand.grid(xC,xT))
  names(st1) <- c('xC','xT')
for(i in 1:nrow(st1)){
  st1$zvalue[i] <- compute.z(st1[i,1], st1[i,2], n=20)
}
  st1$pvalue <- round(1-pnorm(st1$zvalue),4)
  st1$rej1 <- st1$pvalue < 0.005
  st1.con <- st1[ !st1$rej, ]
    row.names(st1.con) <- NULL

  cond.pow20 <- cond.pow40 <- nFor80 <- rep(NA, nrow(st1.con)) 
for( i in 1:nrow(st1.con)){
  cond.pow20[i] <- CondPow(n1=20,n2=20,n.add=00, pC=0.5,pT=0.8, 
                           xC=st1.con[i,1], xT=st1.con[i,2], alp2=0.02)
  cond.pow40[i] <- CondPow(n1=20,n2=20,n.add=20, pC=0.5,pT=0.8, 
                           xC=st1.con[i,1], xT=st1.con[i,2], alp2=0.02)
  if(cond.pow20[i] < 0.8 & cond.pow40[i] > 0.8){
  nFor80[i] <- compute.n2(n1=20,n2=20, pC=0.5,pT=0.8, 
                          xC=st1.con[i,1], xT=st1.con[i,2], alp2=0.02)
  }
} 
st1.con$cp20 <- round(100*cond.pow20) 
st1.con$cp40 <- round(100*cond.pow40)
st1.con$n2 <- nFor80 + 20
   
st1.con2 <- st1.con[ st1.con$cp40 > 50,]
  row.names(st1.con2) <- NULL
  n2x <- 20 + 20*(st1.con2$cp20 < 80)
  
st1.con2$cp <- ifelse(st1.con2$cp20>=80, st1.con2$cp20, st1.con2$cp40)
st1.con2$cp[ !is.na(st1.con2$n2) ] <- 80
st1.con2$n2[is.na(st1.con2$n2)] <- n2x[is.na(st1.con2$n2)]

plot(0, type='n', xlim=c(0,20), ylim=c(0,20), xlab='Control: Stage 1', ylab='Treatment: Stage 1',
    panel.first=grid(nx=0,ny=0))
title(main='Conditional Power', font.main=3, adj=0)
for(i in 1:nrow(st1.con2)){
  text(st1.con2$xC[i], st1.con2$xT[i], st1.con2$cp[i], cex=0.8)
}

plot(0, type='n', xlim=c(0,20), ylim=c(0,20), xlab='Control: Stage 1', ylab='Treatment: Stage 1',
    panel.first=grid(nx=0,ny=0))
title(main='Stage 2 sample size', font.main=3, adj=0)
for(i in 1:nrow(st1.con2)){
  text(st1.con2$xC[i], st1.con2$xT[i], st1.con2$n2[i], cex=0.8)
}