#### Simulating Causal inference Stat Med paper rm(list=ls()) set.seed(20140128) n<-2000 u0<-rbinom(n,1,.4) a0<-rbinom(n,1,.5) l1<-rbinom(n,1,.25+.3*a0-.2*u0-.05*a0*u0) a1<-rbinom(n,1,.4+.5*a0-.3*l1-.4*a0*l1) y<-rnorm(n,2.5-.5*a0-.75*a1+.2*a0*a1-u0,.2) mod.bad1<-lm(y~a0+a1+a0*a1) summary(mod.bad1) mod.bad2<-lm(y~a0+a1+a0*a1+l1) summary(mod.bad2) #### G-computation #E(Y00) ey00<-mean(y[a0==0&a1==0&l1==1])*mean(l1[a0==0])+ mean(y[a0==0&a1==0&l1==0])*(1-mean(l1[a0==0])) #E(Y01) ey01<-mean(y[a0==0&a1==1&l1==1])*mean(l1[a0==0])+ mean(y[a0==0&a1==1&l1==0])*(1-mean(l1[a0==0])) #E(Y10) ey10<-mean(y[a0==1&a1==0&l1==1])*mean(l1[a0==1])+ mean(y[a0==1&a1==0&l1==0])*(1-mean(l1[a0==1])) #E(Y11) ey11<-mean(y[a0==1&a1==1&l1==1])*mean(l1[a0==1])+ mean(y[a0==1&a1==1&l1==0])*(1-mean(l1[a0==1])) g.int<-ey00 g.0<-ey10-ey00 g.1<-ey01-ey00 g.01<-ey11-g.int-g.0-g.1 g.int g.0 g.1 g.01 ey00 ey01 ey10 ey11 mean(y[a0==0&a1==0]) ## naive estimates mean(y[a0==1&a1==1]) ##### IPTW prob<-ifelse(l1==0&a0==0&a1==1,mean(a1[l1==0&a0==0])*(1-mean(a0)), ifelse(l1==0&a0==1&a1==1,mean(a1[l1==0&a0==1])*mean(a0), ifelse(l1==1&a0==0&a1==1,mean(a1[l1==1&a0==0])*(1-mean(a0)), ifelse(l1==1&a0==1&a1==1,mean(a1[l1==1&a0==1])*mean(a0), ifelse(l1==0&a0==0&a1==0,(1-mean(a1[l1==0&a0==0]))*(1-mean(a0)), ifelse(l1==0&a0==1&a1==0,(1-mean(a1[l1==0&a0==1]))*mean(a0), ifelse(l1==1&a0==0&a1==0,(1-mean(a1[l1==1&a0==0]))*(1-mean(a0)), ifelse(l1==1&a0==1&a1==0,(1-mean(a1[l1==1&a0==1]))*mean(a0),NA)))))))) w<-1/prob mod.msm<-lm(y~a0+a1+a0*a1,weights=w) summary(mod.msm) num.prob<-ifelse(a0==0&a1==0,(1-mean(a1[a0==0]))*(1-mean(a0)), ifelse(a0==1&a1==0,(1-mean(a1[a0==1]))*mean(a0), ifelse(a0==0&a1==1,mean(a1[a0==0])*(1-mean(a0)), ifelse(a0==1&a1==1,mean(a1[a0==1])*mean(a0),NA)))) sw<-num.prob/prob mod.msm.sw<-lm(y~a0+a1+a0*a1,weights=sw) summary(mod.msm.sw) ####################################### ######## Parametric version n<-200000 u0<-rbinom(n,1,.4) a0<-rnorm(n,0,1) l1<-rbinom(n,1,exp(-1.2+.4*a0-u0)/(1+exp(-1.2+.4*a0-u0))) a1<-rnorm(n,0.2+0.6*a0-l1, 0.75) y<-rnorm(n,0.2 - 0.12*a0 - 0.2*a1 -0.5*u0, 0.93) ########## ### g-computation mod.gf.y<-lm(y~a0+a1+a0*a1+l1+a0*l1+a1*l1+a0*a1*l1) summary(mod.gf.y) mod.gf.l1<-glm(l1~a0+I(a0^2), family="binomial") summary(mod.gf.l1) ### Predicting it at a specific level of treatment assignment (a0=0, a1=0) newdata.y.0<-data.frame(a0=0,a1=0,l1=0) newdata.y.1<-data.frame(a0=0,a1=0,l1=1) newdata.l1<-data.frame(a0=0) ey00.gf<- predict(mod.gf.y,newdata=newdata.y.1)/(1+exp(-predict(mod.gf.l1,newdata=newdata.l1))) + predict(mod.gf.y,newdata=newdata.y.0)*(1-1/(1+exp(-predict(mod.gf.l1,newdata=newdata.l1)))) ey00.gf ### Now creating an MSM using observed distribution of a0 and a1. newdata.y.all.0<-data.frame(a0,a1,l1=0) newdata.y.all.1<-data.frame(a0,a1,l1=1) newdata.l1.all<-data.frame(a0) pred.valus.gf<-predict(mod.gf.y,newdata=newdata.y.all.1)/(1+exp(-predict(mod.gf.l1,newdata=newdata.l1.all))) + predict(mod.gf.y,newdata=newdata.y.all.0)*(1-1/(1+exp(-predict(mod.gf.l1,newdata=newdata.l1.all)))) mod.msm.gf<-lm(pred.valus.gf~a0+a1) summary(mod.msm.gf) ############# ### IPW mod.a1.l<-lm(a1~a0+l1+a0*l1) summary(mod.a1.l)$sigma mod.a1<-lm(a1~a0) summary(mod.a1)$sigma w.den<-dnorm(a1, predict(mod.a1.l), summary(mod.a1.l)$sigma) w.num<-dnorm(a1, predict(mod.a1), summary(mod.a1)$sigma) sw<-w.num/w.den mod.msm.ipw<-lm(y~a0+a1, weight=sw) summary(mod.msm.ipw) ### parameter values are -0.12 and -0.2 ey00.msm.ipw<-mod.msm.ipw$coeff%*%c(1,0,0) #### Comparing IPW vs. g-computation answers. These are never exactly the same (different models), #### but as the sample size gets bigger they become close to each other. ey00.gf ey00.msm.ipw summary(mod.msm.gf) summary(mod.msm.ipw)