?plot
and help(plot)
will show the help page for the function plot(). For some special functions, you only can access the help page through help(). For example, help("[")
tells you the usage of accessing data through index.
At the end of most help pages, there are examples. You can try those examples line by line to see the effects. Or, you can use example() function to go through them all at once. For example, example(plot)
will go through all the code in the plot() help page. This function often leads to >1 page of plots; to see all of them, you may want to turn on history recording by clicking on the plot window and then go to History -> Recording. Note that history recording eats up memory quickly.
A few commands and packages also have built-in demonstration features. demo()
will list them. Try out demo(graphics)
.
try <- table(rpois(100, lambda=3)) barplot(try) barplot(try, horiz=T) pie(try) dotchart(try)
jitter() is the function that can help with drawing jittering plot.
curve(dnorm(x, 0, 0.5), -5, 5, xlab="", ylab="Density") curve(dnorm(x, 0, 1), -5, 5, add=T, col=2) curve(dnorm(x, 0, 2), -5, 5, add=T, col=3) title(main=expression(paste("Normal distributions: ", N(list(0, sigma^2)))), sub=expression(paste("If ", italic(X), " ~ N(0, 1), then ", sigma*italic(X)+mu, " ~ ", N(list(mu, sigma^2))))) legend(1.5, 0.8, lty=1, col=1:3, bty="n", legend=c(expression(sigma == 0.5), expression(sigma == 1), expression(sigma == 2)))
curve(dt(x, 50), -5, 5, xlab="", ylab="Density") curve(dt(x, 10), -5, 5, add=T, col=2) curve(dt(x, 5), -5, 5, add=T, col=3) curve(dt(x, 1), -5, 5, add=T, col=4) title(main=expression(paste(italic(t), " distributions: ", italic(t)[df]))) mtext(expression(paste(italic(t)[1], " is Cauchy distribution and ", italic(t)[infinity], " ~ N(0, 1)")), side=1, line=2.5, adj=0, at=-6) mtext(expression(paste(E(italic(t)[d]) == 0, ", when d > 1, ", SD(italic(t)[d]) == sqrt(d/(d-2)), ", when d > 2.")), side=1, line=3.8, adj=0, at=-6) legend(1.5, 0.4, lty=1, col=1:4, bty="n", legend=c("df = 50", "df = 10", "df = 5", "df = 1"))
TvsNormal = function(df) { curve(dt(x, df), -5, 5, xlab="", ylab="Density") # Normal distribution with same peak height curve(dnorm(x, 0, 1/dt(0, df)/sqrt(2*pi)), -5, 5, add=T, col=2) # Normal distribution with same variance # (t1 and t2 don't have finite variance) if (df > 2) curve(dnorm(x, 0, sqrt(df/(df-2))), -5, 5, add=T, col=3) title(main=eval(expression(paste("df = ", df)))) legend(1, dt(0, df), lty=1, col=1:3, bty="n", cex=.6, legend=c(do.call("expression", list(bquote(t[.(df)]))), "Normal, same peak height", "Normal, same variance")) } par(mfrow=c(2,2), mar=c(3, 4, 2, 1)+.1) TvsNormal(1) TvsNormal(5) TvsNormal(10) TvsNormal(50)
curve(df(x, 1, 10), 0, 5, xlab="", ylab="Density") curve(df(x, 1, 50), 0, 5, add=T, col=2) curve(df(x, 5, 10), 0, 5, add=T, col=3) curve(df(x, 5, 50), 0, 5, add=T, col=4) title(main=expression(paste(italic(F), " distributions: ", italic(F)[list(v1, v2)])), sub=expression(paste("Note: ", E(italic(F)[list(v1,v2)])==v2/(v2-2), ", when v2 > 2. If ", italic(X), " ~ ", italic(t)[d], ", then ", italic(X)^2, " ~ ", italic(F)[list(1, d)]))) legend(2, 1.5, lty=1, col=1:4, bty="n", legend=c("v1 = 1, v2 = 10", "v1 = 1, v2 = 50", "v1 = 5, v2 = 10", "v1 = 5, v2 = 50"))
curve(dchisq(x, 1), 0, 20, xlab="", ylab="Density") curve(dchisq(x, 2), 0, 20, add=T, col=2) curve(dchisq(x, 3), 0, 20, add=T, col=3) curve(dchisq(x, 5), 0, 20, add=T, col=4) curve(dchisq(x, 10), 0, 20, add=T, col=5) title(main=expression(paste("Chi-squared distributions: ", chi[df]^2))) mtext(expression(paste(chi[d]^2, " ~ Gamma(d/2, 1/2). If ", italic(X), " ~ N(0, 1), then ", italic(X)^2, " ~ ", chi[1]^2)), side=1, line=2.5, adj=0, at=-2) mtext(expression(paste("If ", italic(X), " ~ ", chi[d]^2, ", then ", E(italic(X)) == d, " and ", SD(italic(X)) == sqrt(2*d))), side=1, line=3.8, adj=0, at=-2) legend(5, .8, lty=1, col=1:5, bty="n", legend=c("df = 1", "df = 2", "df = 3", "df = 5", "df = 10"))
curve(dnorm(x), -3, 3, xlab="", ylab="Density", ylim=c(0,.8), lwd=2) curve(dchisq(x, 1), 0, 3, add=T, col=2, lwd=2) segments(c(-1/2, 1/2, 0, 1/4), 0, c(-1/2, 1/2, 0, 1/4), c(rep(dnorm(1/2),2), 10, dchisq(1/4,1)), col=c(1,1,2,2)) segments(c(-sqrt(2), sqrt(2), 2), 0, c(-sqrt(2), sqrt(2), 2), c(rep(dnorm(sqrt(2)),2), dchisq(2,1)), col=c(1,1,2), lty=2) title(main=expression(paste("Compare N(0, 1) and ", chi[1]^2, " distributions")), sub=expression(paste("Note: If ", italic(X), " ~ N(0, 1), then ", italic(X)^2, " ~ ", chi[1]^2))) legend(-3, .8, lty=1, lwd=2, col=1:2, bty="n", legend=c("N(0, 1)", expression(chi[1]^2)))
curve(dexp(x, 5), 0, 2, xlab="", ylab="Density", col=1) curve(dexp(x, 2), 0, 2, add=T, col=2) curve(dexp(x, 1), 0, 2, add=T, col=3) curve(dexp(x, 0.5), 0, 2, add=T, col=4) title(main=expression(paste("Exponential distributions: ", Exp(beta)))) mtext(expression(paste(Exp(beta), " ~ Gamma", (list(1, beta)), ", where ", beta, " is rate")), side=1, line=2.5, adj=0, at=-0.3) mtext(expression(paste("If ", italic(X), " ~ ", Exp(beta), ", then ", E(italic(X)) == SD(italic(X)), " = ", 1/beta, ", and ", a*italic(X), " ~ ", Exp(beta/a))), side=1, line=3.8, adj=0, at=-0.3) legend(1, 5, lty=1, col=1:4, bty="n", legend=c(expression(beta == 5), expression(beta == 2), expression(beta == 1), expression(paste(beta == 0.5, ", same as ", chi[2]^2))))
curve(dgamma(x, 0.5), 0, 10, xlab="", ylab="Density") curve(dgamma(x, 1), 0, 10, add=T, col=2) curve(dgamma(x, 1.5), 0, 10, add=T, col=3) curve(dgamma(x, 5), 0, 10, add=T, col=4) title(main=expression(paste("Gamma distributions: Gamma", (list(alpha, beta))))) mtext(expression(paste("If ", italic(X), " ~ Gamma", (list(alpha, 1)), ", then ", italic(beta*X), " ~ Gamma", (list(alpha, 1/beta)))), side=1, line=2.5, adj=0, at=-1) mtext(expression(paste("If ", italic(X), " ~ Gamma", (list(alpha, beta)), ", then ", E(italic(X)) == alpha/beta, " and ", SD(italic(X)) == sqrt(alpha/beta^2))), side=1, line=3.8, adj=0, at=-1) legend(4, 1.5, lty=1, col=1:4, bty="n", legend=c(expression(list(alpha == 0.5, beta == 1)), expression(list(alpha == 1.0, ~~~beta == 1)), expression(list(alpha == 1.5, beta == 1)), expression(list(alpha == 5.0, ~~~beta == 1))))
m = 2; s = 3 sigma = sqrt(log((s/m)^2+1)); mu = log(m)-.5*sigma^2 curve(dlnorm(x, mu, sigma), 0, 5, col=1, xlab="", ylab="Density") beta = m/s^2; alpha = beta*m curve(dgamma(x, alpha, beta), 0, 5, add=T, lty=2, col=1) m = 2; s = 2 sigma = sqrt(log((s/m)^2+1)); mu = log(m)-.5*sigma^2 curve(dlnorm(x, mu, sigma), 0, 5, add=T, col=2) beta = m/s^2; alpha = beta*m curve(dgamma(x, alpha, beta), 0, 5, add=T, lty=2, col=2) m = 2; s = 1 sigma = sqrt(log((s/m)^2+1)); mu = log(m)-.5*sigma^2 curve(dlnorm(x, mu, sigma), 0, 5, add=T, col=3) beta = m/s^2; alpha = beta*m curve(dgamma(x, alpha, beta), 0, 5, add=T, lty=2, col=3) title(main=c("Compare log-normal and Gamma distributions when", "modeling positive-valued data with mean m and SD s")) mtext(expression(paste("For log-normal, ", sigma == sqrt(ln((s/m)^2+1)), " and ", mu == ln(m) - 0.5*sigma^2)), side=1, line=2.5, adj=0, at=-0.5) mtext(expression(paste("For Gamma, ", alpha == (m/s)^2, " and ", beta == m/s^2)), side=1, line=3.8, adj=0, at=-0.5) legend(2, 0.6, lty=rep(1:2,3), col=rep(1:3,each=2), bty="n", legend=c("Log-normal: m=2, s=3", "Gamma: m=2, s=3", "Log-normal: m=2, s=2", "Gamma: m=2, s=2", "Log-normal: m=2, s=1", "Gamma: m=2, s=1"))
par(mfrow=c(2,2), oma=c(1.5,0,1.5,0), mar=c(3, 4, 1, 1)+.1) curve(dbeta(x, 0.5, 0.5), 0, 1, xlab="", ylab="Density", ylim=c(0,5)) curve(dbeta(x, 0.5, 1), 0, 1, add=T, col=2) curve(dbeta(x, 0.5, 5), 0, 1, add=T, col=3) legend(0.2, 5, lty=1, col=1:3, bty="n", legend=c(expression(list(alpha == 0.5, beta == 0.5)), expression(list(alpha == 0.5, beta == 1)), expression(list(alpha == 0.5, beta == 5)))) curve(dbeta(x, 1, 0.5), 0, 1, xlab="", ylab="Density", ylim=c(0,5)) curve(dbeta(x, 1, 1), 0, 1, add=T, col=2) curve(dbeta(x, 1, 5), 0, 1, add=T, col=3) legend(0.2, 5, lty=1, col=1:3, bty="n", legend=c(expression(list(alpha == 1, beta == 0.5)), expression(list(alpha == 1, beta == 1)), expression(list(alpha == 1, beta == 5)))) curve(dbeta(x, 3, 0.5), 0, 1, xlab="", ylab="Density", ylim=c(0,5)) curve(dbeta(x, 3, 1), 0, 1, add=T, col=2) curve(dbeta(x, 3, 5), 0, 1, add=T, col=3) legend(0.2, 5, lty=1, col=1:3, bty="n", legend=c(expression(list(alpha == 3, beta == 0.5)), expression(list(alpha == 3, beta == 1)), expression(list(alpha == 3, beta == 5)))) curve(dbeta(x, 3, 5), 0, 1, lwd=2, xlab="", ylab="Density", ylim=c(0,8)) curve(dbeta(x, 9, 15), 0, 1, add=T, col=2, lwd=2) curve(dbeta(x, 30, 50), 0, 1, add=T, col=3, lwd=2) curve(dbeta(x, .3, .5), 0, 1, add=T, col=1, lty=2) curve(dbeta(x, .6, 1), 0, 1, add=T, col=2, lty=2) curve(dbeta(x, .9, 1.5), 0, 1, add=T, col=3, lty=2) abline(v=3/8) legend(0.4, 8, lwd=rep(2:1,each=3), lty=rep(1:2,each=3), col=rep(1:3,2), bty="n", legend=c(expression(list(alpha == 3, beta == 5)), expression(list(alpha == 9, beta == 15)), expression(list(alpha == 30, beta == 50)), expression(list(alpha == .3, beta == 0.5)), expression(list(alpha == .6, beta == 1)), expression(list(alpha == .9, beta == 1.5)))) mtext(expression(paste("Beta distributions: Beta", (list(alpha, beta)))), side=3, outer=T) mtext(expression(paste("Note: If ", italic(X), " ~ Beta", (list(alpha, beta)), ", then ", E(italic(X)) == alpha / (alpha + beta), " and ", 1 - italic(X), " ~ Beta", (list(beta, alpha)))), side=1, outer=T)
If X1, ..., Xn are independent and uniformly distributed over the interval (0, 1), then the k-th smallest number X(k) ~ Beta(k, n - k + 1) and the length of the range X(n) - X(1) ~ Beta(n - 1, 2).
drawbinomial1 = function(n) { x = 0:n a1 = dbinom(x, n, .1) a3 = dbinom(x, n, .3) a5 = dbinom(x, n, .5) a7 = dbinom(x, n, .7) a9 = dbinom(x, n, .9) ymax = max(a1, a3, a5, a7, a9) plot(1:n, 1:n, type="n", xlab="x", ylab="Probability", xlim=c(-.2, n+.2), ylim=c(0,ymax)) segments(x-.2, 0, x-.2, a1, lwd=2) segments(x-.1, 0, x-.1, a3, lwd=2, col=2) segments(x, 0, x, a5, lwd=2, col=3) segments(x+.1, 0, x+.1, a7, lwd=2, col=4) segments(x+.2, 0, x+.2, a9, lwd=2, col=5) title(main=eval(expression(paste("Binomial Distributions with n = ", n)))) legend(n/2, ymax, lty=1, col=1:5, bty="n", legend=c("p = 0.1", "p = 0.3", "p = 0.5", "p = 0.7", "p = 0.9")) } drawbinomial1(10)
drawbinomial2 = function(p) { b10 = dbinom(0:10, 10, p) b20 = dbinom(0:20, 20, p) b50 = dbinom(0:50, 50, p) b100 = dbinom(0:100, 100, p) b200 = dbinom(0:200, 200, p) ymax = max(b10, b20, b50, b100, b200) plot(1, 1, type="n", xlab="x/n", ylab="Probability", xlim=c(0, 1), ylim=c(0,ymax)) segments((0:10)/10, 0, (0:10)/10, b10, lwd=2) segments((0:20)/20, 0, (0:20)/20, b20, lwd=2, col=2) segments((0:50)/50, 0, (0:50)/50, b50, lwd=2, col=3) segments((0:100)/100, 0, (0:100)/100, b100, lwd=2, col=4) segments((0:200)/200, 0, (0:200)/200, b200, lwd=2, col=5) title(main=eval(expression(paste("Binomial Distributions with p = ", p)))) xlgd = ifelse(p<.5, .6, 0) legend(xlgd, ymax, lty=1, col=1:5, bty="n", legend=c("n = 10", "n = 20", "n = 50", "n = 100", "n = 200")) } drawbinomial2(.3)
x = 0:10 plot(x, x, type="n", xlab="x", ylab="Probability", xlim=c(-.2, 10+.2), ylim=c(0, 0.61)) segments(x-.2, 0, x-.2, dpois(x, .5), lwd=2) segments(x-.1, 0, x-.1, dpois(x, 1), lwd=2, col=2) segments(x, 0, x, dpois(x, 3), lwd=2, col=3) segments(x+.1, 0, x+.1, dpois(x, 5), lwd=2, col=4) title(main=expression(paste("Poisson Distributions: ", Poisson(lambda))), sub=expression(paste("Note: ", Poisson(lambda), " has mean ", lambda, " and SD ", sqrt(lambda)))) legend(5, 0.6, lty=1, col=1:4, bty="n", legend=c(expression(lambda == 0.5), expression(lambda == 1), expression(lambda == 3), expression(lambda == 5)))
x = 0:20 plot(x, x, type="n", xlab="x", ylab="Probability", xlim=c(-.2, 20+.2), ylim=c(0, 0.2)) segments(x-.2, 0, x-.2, dpois(x, 5), lwd=2) segments(x-.1, 0, x-.1, dbinom(x, 100, .05), lwd=1) segments(x+.1, 0, x+.1, dpois(x, 10), lwd=2, col=2) segments(x+.2, 0, x+.2, dbinom(x, 100, .1), lwd=1, col=2) title(main=expression(paste("Compare ", Poisson(lambda), " and ", Binomial(list(n,p)), " when ", lambda == np, " is small"))) legend(10, 0.2, lty=1, col=c(1,1,2,2), lwd=c(2,1,2,1), bty="n", legend=c("Poisson(5)", "Binomial(100, 0.05)", "Poisson(10)", "Binomial(100, 0.10)"))
x = 0:20 plot(x, x, type="n", xlab="x", ylab="Probability", xlim=c(-.3, 20+.3), ylim=c(0, 0.2)) m = 5 ## Poisson for s2 = m segments(x-.2, 0, x-.2, dpois(x, m), lwd=2) ## negative binomial for s2 > m s2 = 10; r=round(m^2/(s2-m)); p=m/s2 print(c(r, p)) segments(x-.1, 0, x-.1, dnbinom(x, r, p), lwd=1) m = 10 segments(x+.1, 0, x+.1, dpois(x, m), lwd=2, col=2) s2 = 20; r=m^2/(s2-m); p=m/s2 print(c(r, p)) segments(x+.2, 0, x+.2, dnbinom(x, r, p), lwd=1, col=2) title(main=expression(paste("Compare ", Poisson(lambda), " and negative binomial", (list(r,p))))) legend(7, 0.2, lty=1, col=c(1,1,2,2), lwd=c(2,1,2,1), bty="n", legend=c("Poisson(5): m = 5, var = 5", "NBinomial(5, 0.5): m = 5, var = 10", "Poisson(10): m = 10, var = 10", "NBinomial(10, 0.5): m = 10, var = 20"))
par(mfrow=c(1,2), oma=c(4,0,1.5,0), mar=c(2, 4, 2, 1)+.1) revbeta = pi/sqrt(6) cllmedian = (0.577216+log(log(2)))/revbeta curve(dlogis(x, 0, sqrt(3)/pi), -2, 2, xlab="", ylab="Density", ylim=c(0,.5), main="Distributions") curve(dnorm(x), -2, 2, add=T, col=2) curve(revbeta*exp(-exp(x*revbeta-0.577216))*exp(x*revbeta-0.577216), -2, 2, add=T, col=3) legend(-2.7, .5, lty=1, col=1:3, bty="n", legend=c("Logistic", "Standard normal", "Gumbel")) curve(plogis(x, 0, sqrt(3)/pi), -2, 2, xlab="", ylab="Probability", main="Corresponding CDFs") curve(pnorm(x), -2, 2, add=T, col=2) curve(1-exp(-exp(x*revbeta-0.577216)), -2, 2, add=T, col=3) segments(-2, 0.5, cllmedian, 0.5) segments(c(0, cllmedian), 0, c(0, cllmedian), 0.5) legend(-2.5, 1, lty=1, col=1:3, bty="n", legend=c("logit", "probit", "complementary log-log")) mtext("Links functions in regressions for binary outcome", side=3, outer=T, cex=1.5) mtext("Note: All distributions have mean = 0 and SD = 1.", side=1, outer=T, line=0.5, adj=0) mtext(paste("Note: If a link function is correct to model the effect", "of a variable, then the right panel shows the risk as", "a function of the z-score of the variable."), side=1, outer=T, line=1.5, adj=0)
curve(plogis((x-15)/2, 0, sqrt(3)/pi), 10, 20, xlab="", ylab="Probability", main="Logistic Distributions") curve(plogis((x-15)/2, 0, 1), 10, 20, add=T) curve(plogis((x-15)/2, 0, .2), 10, 20, add=T) curve(plogis((x-12)/2, 0, sqrt(3)/pi), 10, 20, add=T) curve(plogis((x-12)/2, 0, 1), 10, 20, add=T) curve(plogis((x-12)/2, 0, .2), 10, 20, add=T) curve(plogis((x-17)/2, 0, sqrt(3)/pi), 10, 20, add=T) curve(plogis((x-17)/2, 0, 1), 10, 20, add=T) curve(plogis((x-17)/2, 0, .2), 10, 20, add=T)The following is a simulation to demostrate the variation in fitting logistic regression. Suppose we have 4N observations; N observations have x = 12, 14, 16, and 18, respectively. Under a "real" logistic risk function, we can calculate the risks for each x value, and generate a binary outcome y using binomial sampling. Then we fit a logistic regression to the generated data and draw the fitted risk curve. We repeat this 10 times. The red curve is the "real" underlying curve.
xx = seq(12,18,2) # xx takes 12, 14, 16, 18 prob = plogis(xx, 15, 2) # "real" risks N = 20 # A total of 4N observations curve(plogis(x, 15, 2), 10, 20, ylim=c(0,1), type="n", ylab="probability") for(i in 1:10) { dd = rbinom(4, N, prob) data = data.frame(x=c(xx,xx), y=rep(1:0,each=4), w=c(dd, N-dd)) cc = glm(y ~ x, family=binomial, data=data, weights=w)$coef curve(plogis(x, -cc[1]/cc[2], 1/cc[2]), 10, 20, add=T) } curve(plogis(x, 15, 2), 10, 20, add=T, col=2, lwd=2)
x <- rnorm(200) hist(x) a <- matrix(,100,2) for(i in 1:100) { y <- sample(x, 10) a[i, ] <- c(mean(y), sqrt(var(y))) } plot(a, main="Variation Due to Sampling (n=10)", xlab="sample mean", ylab="sample standard deviation") abline(h=sqrt(var(x)), v=mean(x))
runif(20, -2, 2) plot(x, 2+x, type="n") abline(2, 1, lwd=2, col=2) for(i in 1:10) { y = 2 + x + rnorm(20, 0, 1) x1 = x + rnorm(20, 0, 1) lines(x1, lm(y~x1)$fitted) }
twobytwo = function(n1, p1, n2, p2, REPL=1000) { pv1 = pv2 = pv3 = pv4 = pv5 = pv6 = pv7 = NULL x1 = rbinom(REPL, n1, p1) x2 = rbinom(REPL, n2, p2) for(i in 1:REPL) { xx1 = x1[i]; xx2 = x2[i] p1 = xx1/n1; p2 = xx2/n2; poverall = (xx1+xx2)/(n1+n2) ## Fisher's exact test pv1[i] = fisher.test(matrix(c(xx1, n1-xx1, xx2, n2-xx2),2))$p.value ## Pearson's chi-squared test with continuity correction pv2[i] = chisq.test(matrix(c(xx1, n1-xx1, xx2, n2-xx2),2), correct=T)$p.value ## Pearson's chi-squared test without continuity correction pv3[i] = chisq.test(matrix(c(xx1, n1-xx1, xx2, n2-xx2),2), correct=F)$p.value ## Test on risk difference, using normal approximation ## It is equivalent to Pearson's chi-squred test. stat4 = abs(p1 - p2)/sqrt(poverall*(1-poverall)*(1/n1 + 1/n2)) pv4[i] = 2 * (1 - pnorm(stat4)) ## Test on risk ratio, using normal approximation stat5 = abs(log(p1/p2))/sqrt(1/xx1 - 1/n1 + 1/xx2 - 1/n2) pv5[i] = 2 * (1 - pnorm(stat5)) ## Test on odds ratio, using normal approximation ## It is equivalent to logistic regression. stat6 = abs(log(p1*(1-p2)/(p2*(1-p1)))) / sqrt(1/xx1+1/(n1-xx1)+1/xx2+1/(n2-xx2)) pv6[i] = 2 * (1 - pnorm(stat6)) ## Likelihood ratio test stat7 = 2*(xx1*log(p1) + (n1-xx1)*log(1-p1) + xx2*log(p2) + (n2-xx2)*log(1-p2) - (xx1+xx2)*log(poverall) - (n1+n2-xx1-xx2)*log(1-poverall)) pv7[i] = 1-pchisq(stat7, 1) } data.frame(pv1=pv1, pv2=pv2, pv3=pv3, pv4=pv4, pv5=pv5, pv6=pv6, pv7=pv7) } aa = twobytwo(100, .1, 100, .1, REPL=10000) plot(aa) plot(aa, xlim=c(0,.05), ylim=c(0,.05)) par(mfrow=c(2,3)) plot(ecdf(aa$pv1), xlim=c(0,.05), ylim=c(0,.05)); abline(0,1) plot(ecdf(aa$pv2), xlim=c(0,.05), ylim=c(0,.05)); abline(0,1) plot(ecdf(aa$pv3), xlim=c(0,.05), ylim=c(0,.05)); abline(0,1) plot(ecdf(aa$pv5), xlim=c(0,.05), ylim=c(0,.05)); abline(0,1) plot(ecdf(aa$pv6), xlim=c(0,.05), ylim=c(0,.05)); abline(0,1) plot(ecdf(aa$pv7), xlim=c(0,.05), ylim=c(0,.05)); abline(0,1)
matched = function(alpha, beta, OR, N, REPL=1000) { pA = alpha/(alpha + beta) fn = function(x) (1-1/(1+OR*x/(1-x)))*dbeta(x, alpha, beta) pB = integrate(fn, 0, 1, subdivisions=1000)$value message("Marginal OR of B vs. A is ", pB*(1-pA)/(pA*(1-pB))) pv1 = pv2 = rd = ornaive = ormh = numeric(REPL) for(i in 1:REPL) { ## generate data consisting of N subjects ppA = rbeta(N, alpha, beta) dataA = rbinom(N, 1, ppA) ppB = 1-1/(1+OR*ppA/(1-ppA)) dataB = rbinom(N, 1, ppB) ## create a vector of four: (A+, B+), (A-, B+), ## (A+, B-), and (A-, B-). data = as.numeric(table(1-dataA, 1-dataB)) ## estimate of risk difference rd[i] = data[2]-data[3] ## McNemar's test of risk difference pv1[i] = 1-pchisq(rd[i]^2/sum(data[2:3]), 1) ## Naive estimate of odds ratio: (pB+ * pA-)/(pB- * pA+) ornaive[i] = sum(data[1:2])*sum(data[c(2,4)]) / (sum(data[3:4])*sum(data[c(1,3)])) ## Mantel-Haenszel's estimate of odds ratio ormh[i] = data[2]/data[3] ## Wald test based on Mantel-Haenszel's estimate of odds ratio pv2[i] = 2*pnorm(-abs(log(ormh[i]))/sqrt(1/data[2]+1/data[3])) } return(data.frame(pv1=pv1, pv2=pv2, rd=rd, ornaive=ornaive, ormh=ormh)) } aa = matched(8, 2, 4, 100) #aa = matched(1, 1, 4, 100) #aa = matched(8, 2, 2, 100) #aa = matched(1, 1, 2, 100) mean(aa$ornaive, na.rm=T); median(aa$ornaive, na.rm=T) mean(aa$ormh, na.rm=T); median(aa$ormh, na.rm=T)The results suggest that the Mantel-Haenszel method gives a good estimate, with median of the estimate near to real value. The mean of the estimate tends to be bigger because the distribution of the estimate is skewed to the right.