rm(list=ls()) d<-read.csv("NADE combined botswana and ccc deidentified datasets JULY 2010.csv") wks<-d$FOLLOW_UP_WEEKS ccc<-ifelse(d$group==2,0,d$group) nade<-ifelse(is.na(d$NADE),0,d$NADE) nade.cv<-d$CV_EVENT nade.renal<-d$RENAL_EVENT nade.liver<-d$LIVER_EVENT nade.malig<-d$MALIG_EVENT age<-d$AGE_AT_FIRST_VISIT male<-ifelse(d$sex==2,0,d$sex) ### One person had a renal and CV NADE. nade<-ifelse(nade.cv==1&nade.renal==1,2,nade) years<-wks*7/365.25 ccc.data<-data.frame(age=age[ccc==1],male=male[ccc==1],years=years[ccc==1], nade=nade[ccc==1],nade.cv=nade.cv[ccc==1],nade.renal=nade.renal[ccc==1], nade.liver=nade.liver[ccc==1],nade.malig=nade.malig[ccc==1]) bot.data<-data.frame(age=age[ccc==0],male=male[ccc==0],years=years[ccc==0], nade=nade[ccc==0],nade.cv=nade.cv[ccc==0],nade.renal=nade.renal[ccc==0], nade.liver=nade.liver[ccc==0],nade.malig=nade.malig[ccc==0]) ### General NADE modbot<-glm(nade~1+offset(log(years)),family="poisson",subset=ccc==0) cbind(round(exp(modbot$coeff)*1000,2), round(exp(modbot$coeff-1.96*summary(modbot)$coeff[2])*1000,2), round(exp(modbot$coeff+1.96*summary(modbot)$coeff[2])*1000,2)) modccc<-glm(nade~1+offset(log(years)),family="poisson",subset=ccc==1) cbind(round(exp(modccc$coeff)*1000,2), round(exp(modccc$coeff-1.96*summary(modccc)$coeff[2])*1000,2), round(exp(modccc$coeff+1.96*summary(modccc)$coeff[2])*1000,2)) modbot.agesex<-glm(nade~age+male+offset(log(years)), family="poisson",subset=ccc==0) summary(modbot.agesex) ### interaction and nonlinear terms didn't really benefit the model #### Standardized rate 1000*sum(exp(predict(modbot.agesex,newdata=ccc.data)))/sum(ccc.data$years) #### Bootstrapping standardized rates ccc.data.b<-data.frame(age.b=age[ccc==1],male.b=male[ccc==1],years.b=years[ccc==1], nade.b=nade[ccc==1],nade.cv.b=nade.cv[ccc==1],nade.renal.b=nade.renal[ccc==1], nade.liver.b=nade.liver[ccc==1],nade.malig.b=nade.malig[ccc==1]) set.seed(104) inc<-NULL for (i in 1:1000) { boot<-sample.int(length(nade),length(nade),replace=TRUE) nade.b<-nade[boot] age.b<-age[boot] male.b<-male[boot] years.b<-years[boot] ccc.b<-ccc[boot] mod.boot<-glm(nade.b~age.b+male.b+offset(log(years.b)), family="poisson", subset=ccc.b==0) inc[i]<-1000*sum(exp(predict(mod.boot,newdata=ccc.data.b)))/sum(ccc.data.b$years.b) } quantile(inc,c(.025,.975)) mod.together<-glm(nade~1+offset(log(years))+age+male+ccc,family="poisson") summary(mod.together) ### Cardiovascular modbot.cv<-glm(nade.cv~1+offset(log(years)),family="poisson",data=bot.data) cbind(round(exp(modbot.cv$coeff)*1000,2), round(exp(modbot.cv$coeff-1.96*summary(modbot.cv)$coeff[2])*1000,2), round(exp(modbot.cv$coeff+1.96*summary(modbot.cv)$coeff[2])*1000,2)) modccc.cv<-glm(nade.cv~1+offset(log(years)),family="poisson",subset=ccc==1) cbind(round(exp(modccc.cv$coeff)*1000,2), round(exp(modccc.cv$coeff-1.96*summary(modccc.cv)$coeff[2])*1000,2), round(exp(modccc.cv$coeff+1.96*summary(modccc.cv)$coeff[2])*1000,2)) modbot.cv.agesex<-glm(nade.cv~age+male+offset(log(years)), family="poisson",subset=ccc==0) summary(modbot.cv.agesex) ### interaction and nonlinear terms didn't really benefit the model #### Standardized Rate 1000*sum(exp(predict(modbot.cv.agesex,newdata=ccc.data)))/sum(ccc.data$years) set.seed(101) inc<-NULL for (i in 1:1000) { # boot<-sample.int(length(nade[ccc==0]),length(nade[ccc==0]),replace=TRUE) boot<-sample.int(length(nade),length(nade),replace=TRUE) nade.cv.b<-nade.cv[boot] age.b<-age[boot] male.b<-male[boot] years.b<-years[boot] ccc.b<-ccc[boot] mod.boot<-glm(nade.cv.b~age.b+male.b+offset(log(years.b)), family="poisson", subset=ccc.b==0) inc[i]<-1000*sum(exp(predict(mod.boot,newdata=ccc.data.b)))/sum(ccc.data.b$years.b) } quantile(inc,c(.025,.975)) mod.together.cv<-glm(nade.cv~1+offset(log(years))+age+male+ccc,family="poisson") summary(mod.together.cv) ### Renal modbot.ren<-glm(nade.renal~1+offset(log(years)),family="poisson",subset=ccc==0) cbind(round(exp(modbot.ren$coeff)*1000,2), round(exp(modbot.ren$coeff-1.96*summary(modbot.ren)$coeff[2])*1000,2), round(exp(modbot.ren$coeff+1.96*summary(modbot.ren)$coeff[2])*1000,2)) modccc.ren<-glm(nade.renal~1+offset(log(years)),family="poisson",subset=ccc==1) cbind(round(exp(modccc.ren$coeff)*1000,2), round(exp(modccc.ren$coeff-1.96*summary(modccc.ren)$coeff[2])*1000,2), round(exp(modccc.ren$coeff+1.96*summary(modccc.ren)$coeff[2])*1000,2)) modbot.ren.agesex<-glm(nade.renal~age+male+offset(log(years)), family="poisson",subset=ccc==0) summary(modbot.ren.agesex) ### interaction and nonlinear terms didn't really benefit the model #### Standardized Rate 1000*sum(exp(predict(modbot.ren.agesex,newdata=ccc.data)))/sum(ccc.data$years) set.seed(102) inc<-NULL for (i in 1:1000) { # boot<-sample.int(length(nade[ccc==0]),length(nade[ccc==0]),replace=TRUE) boot<-sample.int(length(nade),length(nade),replace=TRUE) nade.renal.b<-nade.renal[boot] age.b<-age[boot] male.b<-male[boot] years.b<-years[boot] ccc.b<-ccc[boot] mod.boot<-glm(nade.renal.b~age.b+male.b+offset(log(years.b)), family="poisson", subset=ccc.b==0) inc[i]<-1000*sum(exp(predict(mod.boot,newdata=ccc.data.b)))/sum(ccc.data.b$years.b) } quantile(inc,c(.025,.975)) mod.together.renal<-glm(nade.renal~1+offset(log(years))+age+male+ccc,family="poisson") summary(mod.together.renal) ### Liver modbot.liv<-glm(nade.liver~1+offset(log(years)),family="poisson",subset=ccc==0) cbind(round(exp(modbot.liv$coeff)*1000,2), round(exp(modbot.liv$coeff-1.96*summary(modbot.liv)$coeff[2])*1000,2), round(exp(modbot.liv$coeff+1.96*summary(modbot.liv)$coeff[2])*1000,2)) modccc.liv<-glm(nade.liver~1+offset(log(years)),family="poisson",subset=ccc==1) cbind(round(exp(modccc.liv$coeff)*1000,2), round(exp(modccc.liv$coeff-1.96*summary(modccc.liv)$coeff[2])*1000,2), round(exp(modccc.liv$coeff+1.96*summary(modccc.liv)$coeff[2])*1000,2)) library(Hmisc) ((binconf(sum(nade.liver[ccc==0]),length(nade.liver[ccc==0]))* length(nade.liver[ccc==0]))/sum(years[ccc==0]))*1000 ((binconf(sum(nade.liver[ccc==1]),length(nade.liver[ccc==1]))* length(nade.liver[ccc==1]))/sum(years[ccc==1]))*1000 #### Standardized rate is just zero because there were no events in Botswana mod.together.liver<-glm(nade.liver~1+offset(log(years))+age+male+ccc,family="poisson") summary(mod.together.liver) ### Malignancies modbot.m<-glm(nade.malig~1+offset(log(years)),family="poisson",subset=ccc==0) cbind(round(exp(modbot.m$coeff)*1000,2), round(exp(modbot.m$coeff-1.96*summary(modbot.m)$coeff[2])*1000,2), round(exp(modbot.m$coeff+1.96*summary(modbot.m)$coeff[2])*1000,2)) modccc.m<-glm(nade.malig~1+offset(log(years)),family="poisson",subset=ccc==1) cbind(round(exp(modccc.m$coeff)*1000,2), round(exp(modccc.m$coeff-1.96*summary(modccc.m)$coeff[2])*1000,2), round(exp(modccc.m$coeff+1.96*summary(modccc.m)$coeff[2])*1000,2)) modbot.malig.agesex<-glm(nade.malig~age+male+offset(log(years)), family="poisson",subset=ccc==0) summary(modbot.malig.agesex) ### interaction and nonlinear terms didn't really benefit the model #### Standardized rate 1000*sum(exp(predict(modbot.malig.agesex,newdata=ccc.data)))/sum(ccc.data$years) set.seed(103) inc<-NULL for (i in 1:1000) { # boot<-sample.int(length(nade[ccc==0]),length(nade[ccc==0]),replace=TRUE) boot<-sample.int(length(nade),length(nade),replace=TRUE) nade.malig.b<-nade.malig[boot] age.b<-age[boot] male.b<-male[boot] years.b<-years[boot] ccc.b<-ccc[boot] mod.boot<-glm(nade.malig.b~age.b+male.b+offset(log(years.b)), family="poisson", subset=ccc.b==0) inc[i]<-1000*sum(exp(predict(mod.boot,newdata=ccc.data.b)))/sum(ccc.data.b$years.b) } quantile(inc,c(.025,.975)) mod.together.malig<-glm(nade.malig~1+offset(log(years))+age+male+ccc,family="poisson") summary(mod.together.malig) ### pdf("age-sex-dist.pdf") par(mfrow=c(2,2)) hist(age[ccc==0&male==0],xlim=c(18,82),ylim=c(0,130),main="Females in Botswana") hist(age[ccc==0&male==1],xlim=c(18,82),ylim=c(0,130),main="Males in Botswana") hist(age[ccc==1&male==0],xlim=c(18,82),ylim=c(0,200),main="Females in US") hist(age[ccc==1&male==1],xlim=c(18,82),ylim=c(0,200),main="Males in US") dev.off()