## Proper Inference from Simon's Two-Stage Designs
## user input
## p0 ... P[Success under H0]
## p1 ... P[Success under H1]
## n1 ... Stage 1 sample size
## R1 ... Stage 1 critical value
## nT ... Final planned sample size
## RT ... Final planned critical value
## Convention is to use r1 (= R1 - 1) and rT (= RT - 1)
## --------------------- ##
## Please cite
## Koyama T and Chen H. ``Proper inference from Simon's two-stage designs''
## Statistics in Medicine 27(16), 2008. PMID: 17960777. PMCID: PMC6047527.
## --------------------- ##
TwoStageInf.prob <- function(p0, p1, n1, R1, nT, RT, ROUND=3){
## Continue if X1 >= R1.
## Reject if Xt >= RT.
## Convention is to use r1 = R1-1 and rT = RT-1
n2 <- nT-n1
x1v <- 0:n1
px0 <- dbinom(x1v, n1, p0)
px1 <- dbinom(x1v, n1, p1)
cont <- x1v >= R1
R2 <- pmax(RT - x1v, 0)
cp0 <- replace(1 - pbinom(R2 - 1, n2, p0), x1v < R1, 0)
cp1 <- replace(1 - pbinom(R2 - 1, n2, p1), x1v < R1, 0)
PROB <- data.frame(x1v, px0, px1, cp0, cp1)
## Stage 1 pmf and conditional power
POWE <- round( c( sum(px0 * cp0), sum(px1 * cp1) ), ROUND)
## Unconditional type I erro rate and power
PET <- round( c( pbinom(R1-1, n1, p0), pbinom(R1-1, n1, p1) ), ROUND)
## Probability of early termination
EN <- round( n1 + (nT-n1) * (1-PET), 1)
## Expected sample size
DESIGN <- data.frame(p0=p0, p1=p1, n1=n1, R1=R1, nT=nT, RT=RT)
CHARA <- data.frame(POWE, PET, EN)
row.names(CHARA) <- c('NULL','ALT')
list(DESIGN, CHARA)
}
#############
## p-value ##
#############
pval.st2 <- function(p, n1, R1, nT, RT, m2, x1, x2){
## p is p0 (because this is for p.value) unless computing confidence interval.
## m2 is the actual sample size for stage 2. If there is no change, use m2 = nT-n1.
## Continue if X1 >= R1.
## Convention is to use r1 = R1-1 and rT = RT-1
x1c <- R1:n1
x2c <- RT - x1c - 1
R2 <- RT - x1
d1m <- dbinom(x1c, n1, p)
## pi.star is such that A(x1,n2,pi.star) = conditional p.value
cond.pval <- pbinom(x2-1, m2, p, lower.tail=FALSE)
pi.star <- qbeta(cond.pval, R2, m2-R2+1)
p2m <- pbinom(x2c, m2, pi.star, lower.tail=FALSE)
sum(d1m*p2m)
}
#########################
## Confidence interval ##
#########################
conf.int.st2 <- function(n1, R1, nT, RT, m2, x1, x2, conf.level=0.9, two.sided=TRUE){
alp <- (1 - conf.level) / (1 + as.numeric(two.sided))
f1 <- function(x,a,n2,R1,nT,RT,m2,x1,x2) pval.st2(x, n1, R1, nT, RT, m2, x1, x2) - a
lower.bound <- uniroot(f1, interval=c(0,1), a=alp, n2=n2,R1=R1,nT=nT,RT=RT,m2=m2,x1=x1,x2=x2)$root
median.estimate <- uniroot(f1, interval=c(0,1), a=0.5, n2=n2,R1=R1,nT=nT,RT=RT,m2=m2,x1=x1,x2=x2)$root
upper.bound <- 1
if (two.sided){
upper.bound <- uniroot(f1, interval=c(0,1), a=1-alp, n2=n2,R1=R1,nT=nT,RT=RT,m2=m2,x1=x1,x2=x2)$root
}
out <- list()
out$Conf.int <- c(lower.bound, upper.bound)
out$median.est <- median.estimate
out
}
####################
## Point estimate ##
####################
pxt <- function(p, n1, R1, mT) {
## P[Xt=xt] under p
## xT is 0, 1, ..., mT
xT <- 0:mT
## mT is the actual total sample size.
## xT is the actual total data (combined). Can have multiple elements.
x1c <- R1:n1
d1m <- data.frame( st1=dbinom(x1c, n1, p) )
d2m <- data.frame( st2=dbinom(-outer(x1c,xT, '-'), mT-n1, p) )
px <- apply(mapply('*', d1m,d2m, USE.NAMES=FALSE), 2, sum)
replace(px, xT<R1, dbinom(0:(R1-1),n1,p))
}
## --- ##
## MLE ##
## --- ##
find.phat <- function(n1, R1, mT) {
m2 <- mT - n1
xv <- 0:mT
n <- rep(c(n1, mT), c(R1, mT - R1 + 1))
data.frame(xv, phat=xv / n)
}
Ephat <- function(p, n1, R1, mT){
sum(find.phat(n1,R1,mT)[,2] * pxt(p,n1,R1,mT))
}
## --------- ##
## Whitehead ##
## --------- ##
find.pwh <- function(n1, R1, mT,xT) {
whitehead <- function(n1, R1, mT, xt1) {
temp1 <- find.phat(n1, R1, mT)[xt1 + 1, 2]
f1 <- function(p) Ephat(p, n1, R1, mT) - temp1
uniroot(f1, interval=c(0,1))$root
}
m2 <- mT - n1
xv0 <- 1:(mT - 1)
p.wh <- mapply(function(x) whitehead(n1, R1, mT, x), xv0)
data.frame(xv=c(0, xv0, mT), wh=c(0, p.wh, 1))
}
Ep.wh <- function(p, n1, R1, mT) {
p.wh <- find.pwh(n1, R1, mT)
sum(p.wh[,2] * pxt(p,n1,R1,mT))
}
## -------------------- ##
## Putting all together ##
## -------------------- ##
TwoStageInf.all <- function(p0, p1, n1, R1, nT, RT, x1, m2, x2, ROUND=3, ...) {
result <- list()
param <-
TwoStageInf.prob(p0, p1, n1, R1, nT, RT, ROUND)
result$parameter <- param
1
result$character <- param
2
result$data <- data.frame(x1=x1, n2=nT-n1, m2=m2, x2=x2)
if (x1 < R1)
{
result$phat <- x1/n1
result$p.value <- 1-pbinom(x1-1, n1,p)
}
else
{
result$p.value <- pval.st2(p0,n1,R1,nT,RT,m2,x1,x2)
conf <- conf.int.st2(n1,R1,nT,RT,m2,x1,x2, ...)
result$conf <- conf
1
result$med.esti <- conf
2
}
# ROWid <- ifelse(x1<R1, x1+1, x1+x2+1)
# result$phat <- find.phat(n1,R1,mT)[ROWid,2]
# result$pwh <- find.pwh(n1,R1,mT)[ROWid,2]
result
}
## --- ##
## END ##
## --- ##
## EXAMPLE in Koyama and Chen
## End of section 4.
TwoStageInf.prob(p0=0.3, p1=0.5, n1=19, R1=7, nT=39, RT=17)
## no sample size change.
pval.st2(p=0.3, n1=19, R1=7, nT=39, RT=17, m2=20, x1=7, x2=10)
## With sample size change n2=20, m2=23
TwoStageInf.all(p0=0.3,p1=0.5,n1=19,R1=7,nT=39,RT=17,x1=7,m2=23,x2=10)