Difficulty in Estimating Accuracy in Small Samples
library(Hmisc)
P <- .8 # true probability of a correct prediction
N <- round(10*(2^seq(0,13,by=.25)))
p <- low <- hi <- N
set.seed(3)
y <- rbinom(max(N), 1, P)
i <- 0
for(n in N) {
i <- i+1
s <- sum(y[1:n])
p[i] <- s/n
cat(n,'')
lim <- binconf(s, n, method='wilson')
low[i] <- lim[,'Lower']
hi[i] <- lim[,'Upper']
}
#pdf('/tmp/validation.pdf')
png('/tmp/propCorrect.png')
plot(log2(N), p, ylim=range(c(low,hi)), axes=FALSE, type='b',
xlab='Number of Patients in Validation Sample',
ylab='Estimated Accuracy of Diagnostic Patterns',
main='Estimated Accuracy and Its Margin of Error\nWhen True Classification Accuracy is 0.8')
axis(2)
w <- 10*(2^seq(0,13,by=1))
axis(1, log2(w), w)
lines(log2(N), low, col=gray(.7))
lines(log2(N), hi, col=gray(.7))
abline(h=P, lty=2, col=gray(.7))
dev.off()