############################################################################# ##### T-test vs. linear regression ############################################################################# ##### Do this simulation with them rm(list=ls()) n<-100 beta<-.1 x<-rbinom(n,1,0.5) y<-rnorm(n,beta*x,1) t.test(y~x) mod<-lm(y~x) summary(mod) summary(mod)$coeff[2,4] beta<-0.6 pval.ttest<-pval.lm<-NULL for (i in 1:1000){ x<-rbinom(n,1,0.5) y<-rnorm(n,beta*x,1) pval.ttest[i]<-t.test(y~x)$p.value mod<-lm(y~x) summary(mod) pval.lm[i]<-summary(mod)$coeff[2,4] } cor(pval.ttest,pval.lm) plot(pval.ttest,pval.lm) abline(c(0,1)) mean(pval.ttest<0.05) mean(pval.lm<0.05) ################################################ ##### T-test vs. Wilcoxon rank-sum test ##### With normally distributed data, what is the power of the t-test vs. rank-sum test ##### X~Bin(1,0.5); Y~Normal(0+beta*X,1), n=100, beta=0.5 ##### With log-normally distributed data, what is the power of the t-test vs. rank-sum test ##### Z = exp(Y) rm(list=ls()) n<-100 beta<-.5 x<-rbinom(n, 1, 0.5) ## Y= beta*x + epsilon, where epsilon ~ N(0,1) y<-rnorm(n, beta*x, 1) #ymean = beta*x #y = ymean + rnorm(n, 0, 1) z<-exp(y) t.test(y~x) wilcox.test(y~x) ## if we did not observe y, all we observe are z and x t.test(z~x) wilcox.test(z~x) ## zero beta is the null #beta =0 ## non-zero beta is the alternative beta<-0.5 pval.ttestyeq <- pval.ttesty <- pval.wtesty <-NULL pval.ttestzeq <- pval.ttestz <- pval.wtestz <-NULL for (i in 1:10000){ x<-rbinom(n, 1, 0.5) y<-rnorm(n, beta*x, 1) z<-exp(y) pval.ttestyeq[i]<-t.test(y~x, var.equal=T)$p.value pval.ttesty[i]<-t.test(y~x)$p.value pval.wtesty[i]<-wilcox.test(y~x)$p.value pval.ttestzeq[i]<-t.test(z~x, var.equal=T)$p.value pval.ttestz[i]<-t.test(z~x)$p.value pval.wtestz[i]<-wilcox.test(z~x)$p.value } mean(pval.ttestyeq<0.05) mean(pval.ttesty<0.05) mean(pval.wtesty<0.05) mean(pval.ttestzeq<0.05) mean(pval.ttestz<0.05) mean(pval.wtestz<0.05) ########################################################################## ### Now let's compare the power if we were to dichotomize Z at median (~1.3) beta<-0.5 pval.ttesty<-pval.wtesty<-pval.ttestz<-pval.wtestz<-pval.blogisz<-NULL for (i in 1:1000){ x<-rbinom(n,1,0.5) y<-rnorm(n,beta*x,1) z<-exp(y) z2<-ifelse(z<1.3,0,1) mod2<-glm(z2~x, family="binomial") pval.blogisz[i]<-summary(mod2)$coeff["x",4] pval.ttesty[i]<-t.test(y~x)$p.value pval.wtesty[i]<-wilcox.test(y~x)$p.value pval.ttestz[i]<-t.test(z~x)$p.value pval.wtestz[i]<-wilcox.test(z~x)$p.value } mean(pval.ttesty<0.05) mean(pval.wtesty<0.05) mean(pval.ttestz<0.05) mean(pval.wtestz<0.05) mean(pval.blogisz<0.05) ########################################################################## ### Now let's compare the power if we were to categorize Z at approximate quartiles quantile(z,c(.25,.5,.75)) ### 0.7, 1.1, 2.3 library(MASS) beta<-0.5 pval.ttesty<-pval.wtesty<-pval.ttestz<-pval.wtestz<-pval.blogisz<-pval.ordinalz<-NULL for (i in 1:1000){ x<-rbinom(n,1,0.5) y<-rnorm(n, beta*x, 1) z<-exp(y) z2<-ifelse(z<1.3, 0, 1) z4<-ifelse(z<0.7, 0, ifelse(z<1.1, 1, ifelse(z<2.3, 2, 3))) mod4<-polr(factor(z4) ~ x, method="probit") pval.ordinalz[i]<-2*pnorm(-abs(summary(mod4)$coeff["x","t value"])) mod2<-glm(z2~x, family="binomial") pval.blogisz[i]<-summary(mod2)$coeff["x",4] pval.ttesty[i]<-t.test(y~x)$p.value pval.wtesty[i]<-wilcox.test(y~x)$p.value pval.ttestz[i]<-t.test(z~x)$p.value pval.wtestz[i]<-wilcox.test(z~x)$p.value } mean(pval.ttesty<0.05) mean(pval.wtesty<0.05) mean(pval.ttestz<0.05) mean(pval.wtestz<0.05) mean(pval.blogisz<0.05) mean(pval.ordinalz<0.05) ########################################################################## ### Now let's compare the power if we do not categorize Z but analyze it ### as if it were an ordinal variable ### We could try playing around with different transformations also library(rms) beta<-0 pval.ttesty<-pval.wtesty<-pval.ttestz<-pval.wtestz<-pval.blogisz<-pval.ordinalz<- pval.cpmz<-beta.cpmz<-NULL for (i in 1:1000){ x<-rbinom(n,1,0.5) y<-rnorm(n,beta*x,1) z<-exp(y) z2<-ifelse(z<1.3,0,1) z4<-ifelse(z<0.7,0,ifelse(z<1.1,1,ifelse(z<2.3,2,3))) dd<-datadist(z,x) options(datadist=dd) mod.cpm <- orm(z~x, family="probit") #### could also try with logit family (default) pval.cpmz[i]<-anova(mod.cpm)["x","P"] beta.cpmz[i]<-mod.cpm$coeff["x"] mod4<-polr(factor(z4)~x, method="probit") pval.ordinalz[i]<-2*pnorm(-abs(summary(mod4)$coeff["x","t value"])) mod2<-glm(z2~x, family="binomial") pval.blogisz[i]<-summary(mod2)$coeff["x",4] pval.ttesty[i]<-t.test(y~x)$p.value pval.wtesty[i]<-wilcox.test(y~x)$p.value pval.ttestz[i]<-t.test(z~x)$p.value pval.wtestz[i]<-wilcox.test(z~x)$p.value } mean(pval.ttesty<0.05) mean(pval.wtesty<0.05) mean(pval.ttestz<0.05) mean(pval.wtestz<0.05) mean(pval.blogisz<0.05) mean(pval.ordinalz<0.05) mean(pval.cpmz<0.05) mean(beta.cpmz)