##### Analyses for second PCORI paper, building off of final PCORI analysis for original paper ###### rm(list=ls()) ### Read in necessary pacakges ## install.packages("survey", repos="http://R-Forge.R-project.org") -- Need to use version 240 or later as mentioned by Thomas (11/4/2021) library(survey) library(survival) ### Necessary home-made functions ###### CIfunc<-function(beta,se){ cbind(beta,beta - qnorm(.975)*se, beta+qnorm(.975)*se) } #### Logistic regression influence function (changed name, but is Tong's function) ### Tong's binary influence function for binary data inf.fun.logit <- function(fit) { dm <- model.matrix(fit) Ihat <- (t(dm) %*% (dm * fit$fitted.values * (1 - fit$fitted.values))) / nrow(dm) ## influence function infl <- (dm * resid(fit, type = "response")) %*% solve(Ihat) infl } #### Set working directory here. Do not do this later in file ###Bryan's wd setwd("~/Documents/data-files/pcori-analyses/data-3-may2021/") path<-"~/Documents/data-files/pcori-analyses/data-3-may2021/" ###################################################################### #### Read in new data with all covariates and reduce back down to original cohort of 10335. ###################################################################### ###Phase 1 data from Aihua. Need to rename some variables to be consistent with phase 2 and backwards compatible phase1<-readRDS(paste0(path,"phase1data_2021-04-29.rds")) names(phase1) names(phase1)[names(phase1)=="child_studyid"]<-"baby_studyid" names(phase1)[names(phase1)=="mom_studyid"]<-"studyid" names(phase1)[names(phase1)=="child_age_at_obesity"]<-"ttobesity" ### original phase1 data, included Kyunghee's variables phase1.orig<-read.csv("~/Dropbox/pam/pcori-methods-2016/PCORI Healthy Weight/Pam/Phase1.csv") names(phase1.orig) #get rid of varibles duplicated in phase1 phase1.orig<-phase1.orig[,c("baby_studyid","est_wgt_change_mother","est_bmi_preg_mother")] ### subset dataset to original cohort and merge in Kyunghee's variables phase1<-merge(phase1.orig,phase1,by="baby_studyid",all.x=T) #### Now rename/recode all variables so backwards compatible phase1$mat4race<-"Other" phase1$mat4race<-ifelse(phase1$mat_race=="A","Asian",phase1$mat4race) phase1$mat4race<-ifelse(phase1$mat_race=="B","Black",phase1$mat4race) phase1$mat4race<-ifelse(phase1$mat_race=="W","White",phase1$mat4race) phase1$hispanic<-ifelse(phase1$mat_ethnicity =="HL",1,0) phase1$male<-ifelse(phase1$child_sex=="M",1,0) phase1$mat_diabetes<-ifelse(phase1$mat_diabetes=="Yes",1,0) phase1$mat_diabetes3<-with(phase1,ifelse(mat_diabetes==1,"Type 1/2", ifelse(mat_gest_diabetes=="Yes","Gestational","None"))) phase1$cesarean<-ifelse(is.na(phase1$cesarean),0,phase1$cesarean) phase1$cesarean<-ifelse(phase1$cesarean=="Yes",1,0) phase1$mat_smoking<-ifelse(is.na(phase1$mat_smoking),0,phase1$mat_smoking) ### lots of missing, so need to set base level phase1$tobacPregr<-ifelse(phase1$mat_smoking=="Yes",1,0) phase1$mat_depression<-ifelse(is.na(phase1$mat_depression),0,phase1$mat_depression) ### lots of missing phase1$depressionr<-ifelse(phase1$mat_depression=="Yes",1,0) phase1$mat_insurance<-ifelse(is.na(phase1$mat_insurance),"Unknown",phase1$mat_insurance) phase1$insurancer<- ifelse(phase1$mat_insurance=="Private","Private","Public/Other") phase1$obesity<-ifelse(phase1$child_obesity=="Yes",1,0) phase1$estWtChangePerWk<-phase1$est_wgt_change_mother/39 ### avg length of pregnancy from conception (not last menstrual period) phase1$singleton<-ifelse(phase1$singleton=="Yes",1,0) phase1$asthma<-ifelse(phase1$child_asthma=="yes",1,0) phase1$mat_asthma<-ifelse(phase1$mat_asthma=="Yes",1,0) #### Add ph1 suffix so can merge with phase 2 data of same name. for(i in 3:ncol(phase1)) names(phase1)[i]<-paste(names(phase1[i]),"ph1",sep=".") ### Phase 2 data from Aihua phase2<-readRDS(paste0(path,"phase2data_2021-05-11.rds")) sort(names(phase2)) #Create needed variables phase2$male<-ifelse(phase2$child_sex.factor=="Male",1,0) phase2$mat_age_delivery<-as.numeric(phase2$baby_dob - phase2$mom_dob)/365.25 phase2$mat4race<-"Other" phase2$mat4race<-ifelse(phase2$mat_race.factor =="Asian", "Asian",phase2$mat4race) phase2$mat4race<-ifelse(phase2$mat_race.factor =="Black or African American", "Black",phase2$mat4race) phase2$mat4race<-ifelse(phase2$mat_race.factor =="White", "White",phase2$mat4race) phase2$hispanic<-ifelse(phase2$ethnicity.factor =="Hispanic (all)",1,0) phase2$cesarean<-ifelse(phase2$delivery_type.factor=="Vaginal",0,1) phase2$tobacPregr<-ifelse(phase2$tobacco_use_preg.factor=="Yes",1,0) phase2$mat_depression.factor<-ifelse(is.na(phase2$mat_depression.factor),0,phase2$mat_depression.factor) phase2$depressionr<-ifelse(phase2$mat_depression.factor==1,1,0) phase2$mat_diabetes<-ifelse(is.na(phase2$mat_diabetes),0,phase2$mat_diabetes) phase2$gest_diabetes<-with(phase2,ifelse(is.na(diabetes_preg.factor),"Unknown / Not Reported", as.character(diabetes_preg.factor))) phase2$mat_diabetes3<-with(phase2,ifelse(mat_diabetes==1,"Type 1/2", ifelse(gest_diabetes=="Yes","Gestational","None"))) phase2$insurancer<-ifelse(phase2$insurance.factor=="Private insurance","Private","Public/Other") phase2$married<-ifelse(phase2$marital_status.factor=="Married",1,0) phase2$singleton<-ifelse(is.na(phase2$singleton),0,phase2$singleton) phase2$asthma<-ifelse(phase2$child_asthma.factor=="Yes",1,0) phase2$mat_asthma<-ifelse(is.na(phase2$mat_asthma),0,phase2$mat_asthma) phase2$ttasthma<-as.numeric(phase2$child_asthma_date - phase2$baby_dob+1)/365.25 phase2$obesity<-ifelse(phase2$child_obesity=="Yes",1,0) phase2$ttobesity<-phase2$child_age_at_obesity #### Sampling frame variables phase2$in.phase2<-1 ### in validated sample phase2$in.Ophase2<-with(phase2, ifelse(obesity_wave1=="Yes"|obesity_wave2=="Yes"| obesity_wave3=="Yes"|obesity_wave4=="Yes",1,0)) phase2$in.Aphase2<-with(phase2, ifelse(asthma_wave1=="Yes"|asthma_wave2=="Yes",1,0)) ##### Have to get Kyunghee's phase 2 variables wave12<-read.csv(paste0(path,"wave12.csv")) wave12<-wave12[,c("baby_studyid","estWtChangePerWk","est_bmi_gest","egaWk")] momV.w3<-readRDS(paste0(path,"mother_dataset_validated_wave3-12212020.rds")) momV.w4<-readRDS(paste0(path,"mother_dataset_validated_wave4-01282021.rds")) momV.w5<-readRDS(paste0(path,"mother_dataset_validated_asthma_wave1-03242021.rds")) momV.w6<-readRDS(paste0(path,"mother_dataset_validated_asthma_wave2-04292021.rds")) momV<-rbind(momV.w3,momV.w4,momV.w5,momV.w6) myMom<-NULL for(id in unique(momV$studyid)){ tmp<-momV[momV$studyid==id,] myMom<-rbind(myMom,tmp[1,]) } myMom$matAgeDelivery<-as.numeric(myMom$baby_dob-myMom$mother_dob)/365.25 #### I can confirm ega =estimated ega using this calc. ## My Check myMom$myega<-myMom$baby_dob - myMom$imp_gest_date myMom$estWtChangePerWk <-(myMom$est_wt_change/myMom$ega)*7 myMom$egaWk<- (myMom$baby_dob - myMom$imp_gest_date)/7 table(myMom$myega==myMom$ega) myMom<-myMom[,c("baby_studyid","matAgeDelivery","egaWk","est_bmi_gest","est_wt_change","estWtChangePerWk")] dim(myMom) wave3456<-myMom[,c("baby_studyid","estWtChangePerWk","est_bmi_gest","egaWk")] wave1to6<-merge(wave12,wave3456,all=T) phase2<-merge(phase2,wave1to6,by="baby_studyid") ### rename for consistency with phase 1 names(phase2)[names(phase2)=="est_bmi_gest"]<-"est_bmi_preg_mother" ### Merge Phase 1 and Phase 2. set sampling frame variables for those not in phase 2 sample alldata<-merge(phase1,phase2,by="baby_studyid",all.x=T) alldata$in.Aphase2<-ifelse(is.na(alldata$in.Aphase2),0,alldata$in.Aphase2) alldata$in.Ophase2<-ifelse(is.na(alldata$in.Ophase2),0,alldata$in.Ophase2) alldata$in.phase2<-ifelse(is.na(alldata$in.phase2),0,alldata$in.phase2) table(alldata$in.Aphase2) table(alldata$in.Ophase2) dim(alldata) ########Now need to merge in the strata name info. ############### #### Get wave4 strata variables for obesity frame ########### wave4design<-read.csv(paste0(path,"alldataWave4Design.csv")) wave4design$in.Oframe<-1 Wave4Strata<-wave4design[,c("id","wave4Strata","in.Oframe")] names(Wave4Strata)[names(Wave4Strata)=="id"]<-"baby_studyid" alldata<-merge(alldata,Wave4Strata,by="baby_studyid",all=T) #### Get wave6 strata variables for asthma wave6design<-read.csv(paste0(path,"wave2design.csv")) wave6design$in.Aframe<-1 Wave6Strata<-wave6design[,c("id","strata","in.Aframe")] names(Wave6Strata)[names(Wave6Strata)=="strata"]<-"wave6Strata" names(Wave6Strata)[names(Wave6Strata)=="id"]<-"baby_studyid" alldata<-merge(alldata,Wave6Strata,by="baby_studyid",all=T) alldata$in.Aframe<-ifelse(is.na(alldata$in.Aframe),0,1) table(alldata$in.Aframe) table(alldata$in.Oframe) ### Stuff on IOM guidelines ### Phase 1 data first alldata$iom.guide.ph1<- with(alldata, ifelse(est_bmi_preg_mother.ph1<18.5 & est_wgt_change_mother*2.20462 < 28, "inadequate", ifelse(est_bmi_preg_mother.ph1<18.5 & est_wgt_change_mother*2.20462 > 40, "excessive", ifelse(est_bmi_preg_mother.ph1<25 & est_wgt_change_mother*2.20462 < 25, "inadequate", ifelse(est_bmi_preg_mother.ph1<25 & est_wgt_change_mother*2.20462 > 35, "excessive", ifelse(est_bmi_preg_mother.ph1<30 & est_wgt_change_mother*2.20462 < 15, "inadequate", ifelse(est_bmi_preg_mother.ph1<30 & est_wgt_change_mother*2.20462 > 25, "excessive", ifelse(est_bmi_preg_mother.ph1>=30 & est_wgt_change_mother*2.20462 < 11, "inadequate", ifelse(est_bmi_preg_mother.ph1>=30 & est_wgt_change_mother*2.20462 > 20, "excessive", "adequate"))))))))) table(alldata$iom.guide.ph1) surv.iomA<-survfit(Surv(ttobesity.ph1,obesity.ph1)~1, data=alldata, subset=iom.guide.ph1=="adequate") plot(surv.iomA$time,1-surv.iomA$surv) surv.iomI<-survfit(Surv(ttobesity.ph1,obesity.ph1)~1, data=alldata, subset=iom.guide.ph1=="inadequate") plot(surv.iomI$time,1-surv.iomI$surv) surv.iomE<-survfit(Surv(ttobesity.ph1,obesity.ph1)~1, data=alldata, subset=iom.guide.ph1=="excessive") plot(surv.iomE$time,1-surv.iomE$surv) plot(c(2,6),c(0,.27),type="n",xlab="years",ylab="Cumulative Incidence of Obesity") lines(surv.iomA$time,1-surv.iomA$surv,col=2) lines(surv.iomI$time,1-surv.iomI$surv,col=3) lines(surv.iomE$time,1-surv.iomE$surv,col=4) ##### Now phase 2 data alldata$est_wgt_change_mother.ph2<-alldata$estWtChangePerWk*alldata$egaWk alldata$iom.guide<- with(alldata, ifelse(est_bmi_preg_mother<18.5 & est_wgt_change_mother.ph2*2.20462 < 28, "inadequate", ifelse(est_bmi_preg_mother<18.5 & est_wgt_change_mother.ph2*2.20462 > 40, "excessive", ifelse(est_bmi_preg_mother<25 & est_wgt_change_mother.ph2*2.20462 < 25, "inadequate", ifelse(est_bmi_preg_mother<25 & est_wgt_change_mother.ph2*2.20462 > 35, "excessive", ifelse(est_bmi_preg_mother<30 & est_wgt_change_mother.ph2*2.20462 < 15, "inadequate", ifelse(est_bmi_preg_mother<30 & est_wgt_change_mother.ph2*2.20462 > 25, "excessive", ifelse(est_bmi_preg_mother>=30 & est_wgt_change_mother.ph2*2.20462 < 11, "inadequate", ifelse(est_bmi_preg_mother>=30 & est_wgt_change_mother.ph2*2.20462 > 20, "excessive", "adequate"))))))))) table(alldata$iom.guide) ## BRYAN: I am going to remove variables from alldata that we do not end up needing/using anymore. alldata$marital_status.factor<- alldata$mat_race.factor<- alldata$ethnicity.factor<- alldata$marital_status.factor<- alldata$tobacco_use.factor<- alldata$tobacco_use_preg.factor<- alldata$alcohol_use_preg.factor<- alldata$mat_diabetes.factor<- alldata$mat_diabetes_d_2.factor<- alldata$diabetes_preg.factor<- alldata$mat_depression.factor<- alldata$mat_asthma.factor<- alldata$insurance.factor<- alldata$delivery_type.factor<- alldata$singleton.factor<- alldata$child_sex.factor<- alldata$feeding.factor<- alldata$child_asthma.factor<- alldata$abstractor.factor<- alldata$motherchild_record_complete.factor<- alldata$obesity_wave1<- alldata$obesity_wave2<- alldata$obesity_wave3<- alldata$obesity_wave4<- alldata$asthma_wave1<- alldata$asthma_wave2<-NULL ########################################################################### ########################################################################### ##### Analysis ########################################################################### ########################################################################### ########################################################################### ###Obesity Model #1: Phase 1 data only ########################################################################### library(survival) naive<-coxph(Surv(ttobesity.ph1,obesity.ph1,type='right')~estWtChangePerWk.ph1+est_bmi_preg_mother.ph1+ mat_age_delivery.ph1+relevel(as.factor(mat4race.ph1),ref="White")+ hispanic.ph1 + relevel(as.factor(mat_diabetes3.ph1),ref="None") +cesarean.ph1 +male.ph1 + tobacPregr.ph1+depressionr.ph1+insurancer.ph1 + singleton.ph1, data=alldata, robust=TRUE) summary(naive) #### No evidence of non-linearity when I put a quadratic term in model for estWtChangePerWk.ph1 (p=0.88) library(splines) knot.locs<-quantile(alldata$estWtChangePerWk.ph1,c(1/3,2/3)) knotstuff<-ns(alldata$estWtChangePerWk.ph1,knots=knot.locs) alldata$estWtChangePerWk.s1.ph1<-knotstuff[,1] alldata$estWtChangePerWk.s2.ph1<-knotstuff[,2] alldata$estWtChangePerWk.s3.ph1<-knotstuff[,3] naive.splines<-coxph(Surv(ttobesity.ph1,obesity.ph1,type='right')~ estWtChangePerWk.s1.ph1+estWtChangePerWk.s2.ph1+estWtChangePerWk.s3.ph1+ est_bmi_preg_mother.ph1+ mat_age_delivery.ph1+relevel(as.factor(mat4race.ph1),ref="White")+ hispanic.ph1 + relevel(as.factor(mat_diabetes3.ph1),ref="None") +cesarean.ph1 +male.ph1 + tobacPregr.ph1+depressionr.ph1+insurancer.ph1 + singleton.ph1,data=alldata, robust=TRUE) summary(naive.splines) ##### Creating plots to show non-linear relationship. Requires creating a dummy dataset that ##### varies estWtChangePerWk but puts all other variables at their median or mode smode <-function(x){ xtab<-table(x) modes<-xtab[max(xtab)==xtab] mag<-as.numeric(modes[1]) #in case mult. modes, this is safer themodes<-names(modes) mout<-list(themodes=themodes,modeval=mag) return(mout) } ### Extracting what I want from the splines results summary(alldata$estWtChangePerWk.ph1) quantile(alldata$estWtChangePerWk.ph1,c(.01,.99)) WtChangePerWk<-c(0:65)/100 n.length<-length(WtChangePerWk) #### putting knots in the same location for the dummy data knotstuff3<-ns(WtChangePerWk,knots=knot.locs,Boundary.knots=range(alldata$estWtChangePerWk.ph1)) knotstuff.s1<-knotstuff3[,1] knotstuff.s2<-knotstuff3[,2] knotstuff.s3<-knotstuff3[,3] dummydata<-data.frame( estWtChangePerWk.s1.ph1=knotstuff.s1, estWtChangePerWk.s2.ph1=knotstuff.s2, estWtChangePerWk.s3.ph1=knotstuff.s3, est_bmi_preg_mother.ph1=median(alldata$est_bmi_preg_mother.ph1), mat_age_delivery.ph1=median(alldata$mat_age_delivery.ph1), mat4race.ph1=factor(smode(alldata$mat4race.ph1)$themodes,levels=unique(alldata$mat4race.ph1)), hispanic.ph1=round(mean(alldata$hispanic.ph1)), mat_diabetes3.ph1=factor(smode(alldata$mat_diabetes3.ph1)$themodes,levels=unique(alldata$mat_diabetes3.ph1)), cesarean.ph1=round(mean(alldata$hispanic.ph1)), male.ph1=round(mean(alldata$male.ph1)), tobacPregr.ph1=round(mean(alldata$tobacPregr.ph1)), depressionr.ph1=round(mean(alldata$depressionr.ph1)), insurancer.ph1=factor(smode(alldata$insurancer.ph1)$themodes,levels=unique(alldata$insurancer.ph1)), singleton.ph1=round(mean(alldata$singleton.ph1)), ttobesity.ph1=6, obesity.ph1=1 ) est.lp<-predict(object=naive.splines, newdata=dummydata, type="lp", se.fit=TRUE) lp.est<-est.lp$fit lp.low<-est.lp$fit-1.96*est.lp$se.fit lp.up<-est.lp$fit+1.96*est.lp$se.fit plot(c(min(WtChangePerWk),max(WtChangePerWk)),c(min(lp.low),max(lp.up)), type="n", xlab="weight change per week", ylab="log-hazard") lines(WtChangePerWk,lp.est,lwd=2) lines(WtChangePerWk,lp.low) lines(WtChangePerWk,lp.up) est.expected<-predict(object=naive.splines, newdata=dummydata, type="expected", se.fit=TRUE) p.obese<-1-exp(-est.expected$fit) p.low<-1-exp(-(est.expected$fit-1.96*est.expected$se.fit)) p.up<-1-exp(-(est.expected$fit+1.96*est.expected$se.fit)) plot(c(min(WtChangePerWk),max(WtChangePerWk)),c(min(p.low),max(p.up)), type="n", xlab="weight change per week", ylab="Probability of Obesity at 6 years") lines(WtChangePerWk,p.obese,lwd=2) lines(WtChangePerWk,p.low) lines(WtChangePerWk,p.up) ##### Getting influence functions from the naive.splines model. This will be used for phase2 analyses. alldata$naiveInfl1<-resid(naive.splines,type="dfbeta",weighted=FALSE)[,1] ###### Need to specifically write weighted=FALSE alldata$naiveInfl2<-resid(naive.splines,type="dfbeta",weighted=FALSE)[,2] alldata$naiveInfl3<-resid(naive.splines,type="dfbeta",weighted=FALSE)[,3] #### knots in same location in phase2 data knotstuff2<-ns(alldata$estWtChangePerWk,knots=knot.locs, Boundary.knots=range(alldata$estWtChangePerWk.ph1)) alldata$estWtChangePerWk.s1<-knotstuff2[,1] alldata$estWtChangePerWk.s2<-knotstuff2[,2] alldata$estWtChangePerWk.s3<-knotstuff2[,3] alldata$splineinter1<-with(alldata,estWtChangePerWk.s1*est_bmi_preg_mother) alldata$splineinter2<-with(alldata,estWtChangePerWk.s2*est_bmi_preg_mother) alldata$splineinter3<-with(alldata,estWtChangePerWk.s3*est_bmi_preg_mother) ##### Interaction with Gestational diabetes (starting with keeping things linear) table(alldata$mat_diabetes3.ph1) alldata$diabetesG.ph1<-ifelse(alldata$mat_diabetes3.ph1=="Gestational",1,0) alldata$diabetes12.ph1<-ifelse(alldata$mat_diabetes3.ph1=="Type 1/2",1,0) alldata$diabetesG<-ifelse(alldata$mat_diabetes3=="Gestational",1,0) alldata$diabetes12<-ifelse(alldata$mat_diabetes3=="Type 1/2",1,0) naive.inter<-coxph(Surv(ttobesity.ph1,obesity.ph1,type='right')~ estWtChangePerWk.ph1*diabetes12.ph1 + estWtChangePerWk.ph1*diabetesG.ph1 + est_bmi_preg_mother.ph1+ mat_age_delivery.ph1+relevel(as.factor(mat4race.ph1),ref="White")+ hispanic.ph1 + cesarean.ph1 +male.ph1 + tobacPregr.ph1+depressionr.ph1+insurancer.ph1 + singleton.ph1, data=alldata, robust=TRUE) summary(naive.inter) #### No evidence of interaction alldata$naiveInfl.wtchange<-resid(naive.inter,type="dfbeta",weighted=FALSE)[,1] ###### Need to specifically write weighted=FALSE alldata$naiveInfl.diabetes12<-resid(naive.inter,type="dfbeta",weighted=FALSE)[,2] alldata$naiveInfl.diabetesG<-resid(naive.inter,type="dfbeta",weighted=FALSE)[,3] alldata$naiveInfl.wtchange.diabetes12<-resid(naive.inter,type="dfbeta",weighted=FALSE)[,16] alldata$naiveInfl.wtchange.diabetesG<-resid(naive.inter,type="dfbeta",weighted=FALSE)[,17] naive.inter.bmi.s<-coxph(Surv(ttobesity.ph1,obesity.ph1,type='right')~ estWtChangePerWk.s1.ph1*est_bmi_preg_mother.ph1+estWtChangePerWk.s2.ph1*est_bmi_preg_mother.ph1 + estWtChangePerWk.s3.ph1*est_bmi_preg_mother.ph1+ mat_age_delivery.ph1+relevel(as.factor(mat4race.ph1),ref="White")+ hispanic.ph1 + relevel(as.factor(mat_diabetes3.ph1),ref="None") +cesarean.ph1 +male.ph1 + tobacPregr.ph1+depressionr.ph1+insurancer.ph1 + singleton.ph1,data=alldata, robust=TRUE) summary(naive.inter.bmi.s) #### No evidence of interaction alldata$naiveInfl.inter.bmi.s1<-resid(naive.inter.bmi.s,type="dfbeta",weighted=FALSE)[,1] ###### Need to specifically write weighted=FALSE alldata$naiveInfl.inter.bmi.s2<-resid(naive.inter.bmi.s,type="dfbeta",weighted=FALSE)[,2] alldata$naiveInfl.inter.bmi.s3<-resid(naive.inter.bmi.s,type="dfbeta",weighted=FALSE)[,3] alldata$naiveInfl.inter.bmi.s4<-resid(naive.inter.bmi.s,type="dfbeta",weighted=FALSE)[,4] alldata$naiveInfl.inter.bmi.s5<-resid(naive.inter.bmi.s,type="dfbeta",weighted=FALSE)[,18] alldata$naiveInfl.inter.bmi.s6<-resid(naive.inter.bmi.s,type="dfbeta",weighted=FALSE)[,19] alldata$naiveInfl.inter.bmi.s7<-resid(naive.inter.bmi.s,type="dfbeta",weighted=FALSE)[,20] naive.inter.bmi<-coxph(Surv(ttobesity.ph1,obesity.ph1,type='right')~ estWtChangePerWk.ph1*est_bmi_preg_mother.ph1+ mat_age_delivery.ph1+relevel(as.factor(mat4race.ph1),ref="White")+ hispanic.ph1 + relevel(as.factor(mat_diabetes3.ph1),ref="None") +cesarean.ph1 +male.ph1 + tobacPregr.ph1+depressionr.ph1+insurancer.ph1 + singleton.ph1,data=alldata, robust=TRUE) summary(naive.inter.bmi) alldata$naiveInfl.inter.bmi1<-resid(naive.inter.bmi,type="dfbeta",weighted=FALSE)[,1] ###### Need to specifically write weighted=FALSE alldata$naiveInfl.inter.bmi2<-resid(naive.inter.bmi,type="dfbeta",weighted=FALSE)[,2] alldata$naiveInfl.inter.bmi3<-resid(naive.inter.bmi,type="dfbeta",weighted=FALSE)[,16] ########################################################################### ########################################################################### ##### Study design for single frame twophase sampling ########################################################################### ########################################################################### ### IPW designs designO <- twophase(id=list(~1,~1),strata=list(NULL,~wave4Strata),subset=~in.Ophase2, data=alldata,method="approx") designA <- twophase(id=list(~1,~1),strata=list(NULL,~wave6Strata),subset=~in.Aphase2, data=alldata[alldata$in.Aframe==1,],method="approx") ########################################################################### ########################################################################### ##### Set up multi-frame data and weights ########################################################################### ########################################################################### ### Weights Oframe ### correct in.Ophase2 for(s in unique(alldata$wave4Strata)){ Istrat<-alldata$wave4Strata==s Ns<-sum(Istrat) ns<-sum(Istrat & alldata$in.Ophase2==1) alldata$wtO[Istrat]<-Ns/ns alldata$NSO<-Ns } ### Weights Aframe ### correct in.Aphase2 ### Note, wave6Strata is only nonMissing for in.Aframe=1. So ns and Ns will be correct for Aframe subset. for(s in unique(alldata$wave6Strata[!is.na(alldata$wave6Strata)])){ Istrat<-alldata$wave6Strata==s Ns<-sum(Istrat,na.rm=T) ns<-sum(Istrat & alldata$in.Aphase2==1,na.rm=T) alldata$wtA[Istrat]<-Ns/ns alldata$NsA<-Ns } Oframe<-alldata[alldata$in.Oframe==1,] Oframe$MFstrata<-Oframe$wave4Strata Aframe<-alldata[alldata$in.Aframe==1,] Aframe$MFstrata<-Aframe$wave6Strata mfdata<-data.frame(rbind(Oframe,Aframe)) dim(mfdata) dim(mfdata[mfdata$in.Aframe==1,]) ### Multiframe adjustment (Metcalf and Scott SIM 2009, v28; 1512-1523) mfdata$wt.tilde<-ifelse((mfdata$in.Oframe*(1-mfdata$in.Aframe))==1,mfdata$wtO,0) + ifelse((mfdata$in.Aframe*(1-mfdata$in.Oframe))==1,mfdata$wtA,0) + ifelse((mfdata$in.Oframe*mfdata$in.Aframe)==1,(mfdata$wtA*mfdata$wtO)/(mfdata$wtA+mfdata$wtO),0) #### error checking HH weights mfdata$in.Oonly<-ifelse(mfdata$in.Oframe*(1-mfdata$in.Aframe),TRUE,FALSE) mfdata$in.Aonly<-ifelse(mfdata$in.Aframe*(1-mfdata$in.Oframe),TRUE,FALSE) mfdata$in.both<-ifelse((mfdata$in.Oframe*mfdata$in.Aframe)==1,TRUE,FALSE) table(mfdata$wt.tilde[mfdata$in.Oonly]==mfdata$wtO[mfdata$in.Oonly]) table(mfdata$wt.tilde[mfdata$in.Aonly]==mfdata$wtA) ### Nobody in this category table(round(mfdata$wt.tilde[mfdata$in.both],5)==round((1/(1/mfdata$wtA[mfdata$in.both]+1/mfdata$wtO[mfdata$in.both])),5)) designMF <- twophase(id=list(~1,~1),weights=list(~1,~wt.tilde),strata=list(NULL,~MFstrata), data=mfdata,subset=~in.phase2,method="approx") ########################################################################### ### Obesity Model #3: Phase 2 data only with naive raking variable ########################################################################### Ninflcal<-calibrate(designMF,formula=~naiveInfl1+naiveInfl2+naiveInfl3+MFstrata,phase=2,calfun="raking") IPWMR<-svycoxph(Surv(ttobesity,obesity)~estWtChangePerWk.s1 + estWtChangePerWk.s2 + estWtChangePerWk.s3 + est_bmi_preg_mother +mat_age_delivery+ relevel(as.factor(mat4race),ref="White")+ hispanic+relevel(as.factor(mat_diabetes3),ref="None") + cesarean + male+tobacPregr+ depressionr+ insurancer+ singleton + married + number_children + egaWk,design=Ninflcal) summary(IPWMR) exp(IPWMR$coefficients["est_bmi_preg_mother"]*5) exp(5*(IPWMR$coefficients["est_bmi_preg_mother"]+c(-1,1)*1.96*summary(IPWMR)$coefficients["est_bmi_preg_mother","robust se"])) exp(IPWMR$coefficients["mat_age_delivery"]*10) exp(10*(IPWMR$coefficients["mat_age_delivery"]+c(-1,1)*1.96*summary(IPWMR)$coefficients["mat_age_delivery","robust se"])) test1<-regTermTest(IPWMR, ~estWtChangePerWk.s1 + estWtChangePerWk.s2 + estWtChangePerWk.s3, method="LRT") test1 ### Suggested code from Thomas to test non-linearity (11/4/2021) ### Linear term plus all but one of the spline bases IPWMR.1<-svycoxph(Surv(ttobesity,obesity)~estWtChangePerWk + estWtChangePerWk.s1 + estWtChangePerWk.s2 + est_bmi_preg_mother +mat_age_delivery+ relevel(as.factor(mat4race),ref="White")+ hispanic+relevel(as.factor(mat_diabetes3),ref="None") + cesarean + male+tobacPregr+ depressionr+ insurancer+ singleton + married + number_children + egaWk,design=Ninflcal) test<-regTermTest(IPWMR.1, ~estWtChangePerWk.s1+estWtChangePerWk.s2,method="LRT") test ##p=0.0067; there is a non-linear association dummydata2<-data.frame( estWtChangePerWk.s1=knotstuff.s1, estWtChangePerWk.s2=knotstuff.s2, estWtChangePerWk.s3=knotstuff.s3, est_bmi_preg_mother=median(alldata$est_bmi_preg_mother.ph1), mat_age_delivery=median(alldata$mat_age_delivery.ph1), mat4race=factor(smode(alldata$mat4race.ph1)$themodes,levels=unique(alldata$mat4race.ph1)), hispanic=round(mean(alldata$hispanic.ph1)), mat_diabetes3=factor(smode(alldata$mat_diabetes3.ph1)$themodes,levels=unique(alldata$mat_diabetes3.ph1)), cesarean=round(mean(alldata$hispanic.ph1)), male=round(mean(alldata$male.ph1)), tobacPregr=round(mean(alldata$tobacPregr.ph1)), depressionr=round(mean(alldata$depressionr.ph1)), insurancer=factor(smode(alldata$insurancer.ph1)$themodes,levels=unique(alldata$insurancer.ph1)), singleton=round(mean(alldata$singleton.ph1)), married=round(mean(alldata$married,na.rm=TRUE)), number_children=median(alldata$number_children,na.rm=TRUE), egaWk=median(alldata$egaWk,na.rm=TRUE), ttobesity=6, obesity=1 ) s<-predict(IPWMR,se=TRUE, type="curve", newdata=dummydata2) length(s[[1]]$surv) length(s[[1]]$time) conf.est<-matrix(NA,nrow=nrow(dummydata),ncol=2) surv.est<-NULL timepoint<-max(s[[1]]$time) last.one<-length(s[[1]]$surv) for (i in 1:length(dummydata2$obesity)){ surv.est[i]<-s[[i]]$surv[last.one] conf.est[i,]<-confint(s[[i]],parm=timepoint) } inc.est<-1-surv.est conf.i.est<-1-conf.est pdf("Pr-obesity-wt-change.pdf",width=6,height=5) par(mar=c(5,4,1,1)) plot(c(min(WtChangePerWk),max(WtChangePerWk)),c(min(conf.i.est),max(conf.i.est)),type='n', xlab="Estimated Maternal Weight Change (kg/wk)", ylab="Probability of Obesity prior to Age 6 Years") lines(WtChangePerWk, inc.est) lines(WtChangePerWk, conf.i.est[,1], lty=2) lines(WtChangePerWk, conf.i.est[,2], lty=2) dev.off() ### Removing s because it's making my compute run slowly, and I got what I needed! rm(s) ### Plotting the linear predictor est.lp<-predict(object=IPWMR, newdata=dummydata2, type="lp", se.fit=TRUE) lp.est<-est.lp$fit lp.low<-est.lp$fit-1.96*est.lp$se.fit lp.up<-est.lp$fit+1.96*est.lp$se.fit est.risk<-predict(object=IPWMR, newdata=dummydata2, type="risk", se.fit=TRUE) exp(est.lp$fit) est.risk$fit plot(c(min(WtChangePerWk),max(WtChangePerWk)),c(min(lp.low),max(lp.up)), type="n", xlab="weight change per week", ylab="log-hazard") lines(WtChangePerWk,lp.est,lwd=2) lines(WtChangePerWk,lp.low) lines(WtChangePerWk,lp.up) lp.est[WtChangePerWk==0] lp.est[WtChangePerWk==0.2] lp.est[WtChangePerWk==0.4] lp.est[WtChangePerWk==0.6] exp(lp.est[WtChangePerWk==0.6]-lp.est[WtChangePerWk==0.2]) ### Hazard ratios and 95% CI for the manuscript exp(lp.est[WtChangePerWk==0]-lp.est[WtChangePerWk==0.2]) exp(lp.est[WtChangePerWk==0.2]-lp.est[WtChangePerWk==0.2]) exp(lp.est[WtChangePerWk==0.4]-lp.est[WtChangePerWk==0.2]) exp(lp.est[WtChangePerWk==0.6]-lp.est[WtChangePerWk==0.2]) logHR<-c(lp.est[WtChangePerWk==0]-lp.est[WtChangePerWk==0.2], lp.est[WtChangePerWk==0.1]-lp.est[WtChangePerWk==0.2], lp.est[WtChangePerWk==0.3]-lp.est[WtChangePerWk==0.2], lp.est[WtChangePerWk==0.4]-lp.est[WtChangePerWk==0.2], lp.est[WtChangePerWk==0.5]-lp.est[WtChangePerWk==0.2], lp.est[WtChangePerWk==0.6]-lp.est[WtChangePerWk==0.2]) s1Diff<-dummydata2$estWtChangePerWk.s1[c(which(WtChangePerWk==0),which(WtChangePerWk==.1),which(WtChangePerWk==.3), which(WtChangePerWk==.4),which(WtChangePerWk==.5),which(WtChangePerWk==.6))]- dummydata2$estWtChangePerWk.s1[which(WtChangePerWk==.2)] s2Diff<-dummydata2$estWtChangePerWk.s2[c(which(WtChangePerWk==0),which(WtChangePerWk==.1),which(WtChangePerWk==.3), which(WtChangePerWk==.4),which(WtChangePerWk==.5),which(WtChangePerWk==.6))]- dummydata2$estWtChangePerWk.s2[which(WtChangePerWk==.2)] s3Diff<-dummydata2$estWtChangePerWk.s3[c(which(WtChangePerWk==0),which(WtChangePerWk==.1),which(WtChangePerWk==.3), which(WtChangePerWk==.4),which(WtChangePerWk==.5),which(WtChangePerWk==.6))]- dummydata2$estWtChangePerWk.s3[which(WtChangePerWk==.2)] var.logHR<-IPWMR$var[1,1]*s1Diff^2 + IPWMR$var[2,2]*s2Diff^2 + IPWMR$var[3,3]*s3Diff^2 + 2*IPWMR$var[1,2]*s1Diff*s2Diff + 2*IPWMR$var[1,3]*s1Diff*s3Diff + 2*IPWMR$var[2,3]*s2Diff*s3Diff HR.est<-exp(logHR) HR.est.upper<-exp(logHR+1.96*sqrt(var.logHR)) HR.est.lower<-exp(logHR-1.96*sqrt(var.logHR)) cbind(HR.est,HR.est.lower,HR.est.upper) svyquantile(~estWtChangePerWk, design=designMF, c(.5, .25, .33, .75, .15, .96)) inc.est[WtChangePerWk==0.25] conf.i.est[WtChangePerWk==0.25] inc.est[WtChangePerWk==0.28] conf.i.est[WtChangePerWk==0.28] inc.est[WtChangePerWk==0.5] conf.i.est[WtChangePerWk==0.5] ##### Interaction with gestational diabetes Ninflcal.inter<-calibrate(designMF,formula=~naiveInfl.wtchange+naiveInfl.diabetes12 + naiveInfl.diabetesG + naiveInfl.wtchange.diabetes12 + naiveInfl.wtchange.diabetesG + MFstrata,phase=2,calfun="raking") IPWMR.inter<-svycoxph(Surv(ttobesity,obesity)~ estWtChangePerWk*diabetes12 + estWtChangePerWk*diabetesG + est_bmi_preg_mother +mat_age_delivery+ relevel(as.factor(mat4race),ref="White")+ hispanic + cesarean + male+tobacPregr+ depressionr+ insurancer+ singleton + married + number_children + egaWk,design=Ninflcal.inter) summary(IPWMR.inter) ##### Interaction with BMI at conception Ninflcal.inter.bmi.s<-calibrate(designMF,formula=~naiveInfl.inter.bmi.s1 + naiveInfl.inter.bmi.s2 + naiveInfl.inter.bmi.s3 + naiveInfl.inter.bmi.s4 + naiveInfl.inter.bmi.s5 + naiveInfl.inter.bmi.s6 + naiveInfl.inter.bmi.s7 + MFstrata,phase=2,calfun="raking") IPWMR.inter.bmi.s1<-svycoxph(Surv(ttobesity,obesity)~ estWtChangePerWk.s1 + estWtChangePerWk.s2 + estWtChangePerWk.s3 + est_bmi_preg_mother + mat_age_delivery+ relevel(as.factor(mat4race),ref="White")+ hispanic + cesarean + male+tobacPregr+ depressionr+ insurancer+ singleton + married + number_children + egaWk + splineinter1 + splineinter2 + splineinter3, design=Ninflcal.inter.bmi.s) summary(IPWMR.inter.bmi.s1) test3<-regTermTest(IPWMR.inter.bmi.s1, ~splineinter1 + splineinter2 + splineinter3, method="LRT") test3 ##### Interaction is not significant with non-linear weight change WtChangePerWk1<-WtChangePerWk #c(0,.65) #to shorten computation time if needed dummydata3<-data.frame( estWtChangePerWk=rep(WtChangePerWk1,5), est_bmi_preg_mother=c(rep(20,length(WtChangePerWk1)),rep(23,length(WtChangePerWk1)),rep(27,length(WtChangePerWk1)), rep(31,length(WtChangePerWk1)),rep(36,length(WtChangePerWk1))), mat_age_delivery=median(alldata$mat_age_delivery.ph1), mat4race=factor(smode(alldata$mat4race.ph1)$themodes,levels=unique(alldata$mat4race.ph1)), hispanic=round(mean(alldata$hispanic.ph1)), mat_diabetes3=factor(smode(alldata$mat_diabetes3.ph1)$themodes,levels=unique(alldata$mat_diabetes3.ph1)), cesarean=round(mean(alldata$hispanic.ph1)), male=round(mean(alldata$male.ph1)), tobacPregr=round(mean(alldata$tobacPregr.ph1)), depressionr=round(mean(alldata$depressionr.ph1)), insurancer=factor(smode(alldata$insurancer.ph1)$themodes,levels=unique(alldata$insurancer.ph1)), singleton=round(mean(alldata$singleton.ph1)), married=round(mean(alldata$married,na.rm=TRUE)), number_children=median(alldata$number_children,na.rm=TRUE), egaWk=median(alldata$egaWk,na.rm=TRUE), ttobesity=6, obesity=1) ############################### ## Creating data for table ############################### svyquantile(~estWtChangePerWk, design=designMF, c(.5, .25, .75)) svyquantile(~est_bmi_preg_mother, design=designMF, c(.1,.25,.5, .75,.9)) svyquantile(~mat_age_delivery, design=designMF, c(.5, .25, .75)) svyquantile(~egaWk, design=designMF, c(.5, .25, .75)) svyquantile(~number_children, design=designMF, c(.5, .25, .75)) svymean(~number_children, design=designMF) svymean(~male, design=designMF) svymean(~cesarean, design=designMF) svymean(~tobacPregr, design=designMF) svymean(~depressionr, design=designMF) svymean(~insurancer, design=designMF) svymean(~singleton, design=designMF) svymean(~married, design=designMF) svymean(~obesity, design=designMF) .19663+c(-1,1)*1.96*0.0072 # mean(alldata$obesity,na.rm=TRUE) ## naive estimate just using phase2 svymean(~hispanic, design=designMF) svytable(~mat4race, design=designMF)/sum(svytable(~mat4race, design=designMF)) svytable(~mat_diabetes3, design=designMF)/sum(svytable(~mat_diabetes3, design=designMF)) # svymean(~mat_overweight, design=designMF) # svymean(~mat_obese, design=designMF) ##### Cumulative incidence based on weighted KM svytable(~iom.guide, design=designMF)/sum(svytable(~iom.guide, design=designMF)) km.ests <- svykm(Surv(ttobesity,obesity)~iom.guide, design=Ninflcal) pdf("obesity-by-IOM-guidelines.pdf") plot(c(2,6),c(0,.27),type="n",xlab="Child Age in Years",ylab="Cumulative Incidence of Obesity") lines(km.ests[[1]]$time,1-km.ests[[1]]$surv,col=3) lines(km.ests[[2]]$time,1-km.ests[[2]]$surv,col=4) lines(km.ests[[3]]$time,1-km.ests[[3]]$surv,col=2) legend("bottomright", c("adequate","excessive","inadequate"),col=c(3,4,2), lty=c(1,1,1)) dev.off() km.model<-svycoxph(Surv(ttobesity,obesity)~iom.guide,design=Ninflcal) summary(km.model) ########################################################################### #### Reading in trimester weight gain data trimesterData.ph1<-readRDS(paste0(path,"../data-trimesters-03-nov-2021/mother_dataset_trimester_phase1-11022021.rds")) trimesterData.ph2<-readRDS(paste0(path,"../data-trimesters-03-nov-2021/mother_dataset_trimester_phase2-11022021.rds")) head(trimesterData.ph1) ## The only things that vary are measure_date, measure_age, gest_age, wt head(trimesterData.ph2) ## The only things that vary are measure_date, measure_age, gest_age, wt triData.ph1<-trimesterData.ph1[!duplicated(trimesterData.ph1$studyid),] triData.ph2<-trimesterData.ph2[!duplicated(trimesterData.ph2$studyid),] ### Weight change over entire pregnancies match between this and old dataset. triData.ph1[1,] alldata[which(alldata$baby_studyid==202334),] ### Estimated weight changes per trimester add up. head(triData.ph1) head(cbind(with(triData.ph1,est_wt_change_tri1+est_wt_change_tri2+est_wt_change_tri3), triData.ph1$est_wt_change)) with(triData.ph1,summary(est_wt_change_tri1+est_wt_change_tri2+est_wt_change_tri3-est_wt_change)) ### Creating trimester-specific variables for the phase 1 data ### Absolute weight gained in each trimester triData.ph1$est_wt_change_tri1.ph1<-triData.ph1$est_wt_change_tri1 triData.ph1$est_wt_change_tri2.ph1<-triData.ph1$est_wt_change_tri2 triData.ph1$est_wt_change_tri3.ph1<-triData.ph1$est_wt_change_tri3 triData.ph1$est_wt_change_tri23.ph1<-triData.ph1$est_wt_change_tri2+triData.ph1$est_wt_change_tri3 ### Weight gained per week in each trimester, including in trimesters 2 and 3 together triData.ph1$estWtChangePerWkT1.ph1<-with(triData.ph1,est_wt_change_tri1/tri1_days) triData.ph1$estWtChangePerWkT2.ph1<-with(triData.ph1,est_wt_change_tri2/tri2_days) triData.ph1$estWtChangePerWkT3.ph1<-with(triData.ph1,est_wt_change_tri3/tri3_days) triData.ph1$estWtChangePerWkT23.ph1<-with(triData.ph1, (est_wt_change_tri2+est_wt_change_tri3)/(tri2_days+tri3_days)) ### Proportion of total weight gain that happened in 3rd trimester triData.ph1$propWtGainT3.ph1<-with(triData.ph1, est_wt_change_tri3/est_wt_change) ### The proportion weight gained in trimester 3 is less than 0 and greater than 1 in some mothers. ### These values were truncated at 0 and 1 boxplot(triData.ph1$propWtGainT3.ph1) summary(triData.ph1$propWtGainT3.ph1) triData.ph1[triData.ph1$propWtGainT3.ph1<0,] triData.ph1[triData.ph1$propWtGainT3.ph1>1,] triData.ph1$propWtGainT3.trunc.ph1<-with(triData.ph1,ifelse(propWtGainT3.ph1<0,0, ifelse(propWtGainT3.ph1>1,1,propWtGainT3.ph1))) summary(triData.ph1$propWtGainT3.trunc.ph1) boxplot(triData.ph1$propWtGainT3.trunc.ph1) hist(triData.ph1$propWtGainT3.trunc.ph1) ### Creating a simplified phase 1 dataset triData.ph1.simple<-triData.ph1[,c("studyid","estWtChangePerWkT1.ph1","estWtChangePerWkT2.ph1", "estWtChangePerWkT3.ph1","estWtChangePerWkT23.ph1","propWtGainT3.trunc.ph1", "est_wt_change_tri1.ph1","est_wt_change_tri2.ph1","est_wt_change_tri3.ph1","est_wt_change_tri23.ph1")] ### Merging in the simplified phase 1 dataset alldataT<-merge(alldata,triData.ph1.simple,by.x="studyid.ph1",by.y="studyid",all=TRUE) naiveT1p23a<-coxph(Surv(ttobesity.ph1,obesity.ph1,type='right')~est_wt_change_tri1.ph1+ est_wt_change_tri23.ph1+est_bmi_preg_mother.ph1+ mat_age_delivery.ph1+relevel(as.factor(mat4race.ph1),ref="White")+ hispanic.ph1 + relevel(as.factor(mat_diabetes3.ph1),ref="None") +cesarean.ph1 +male.ph1 + tobacPregr.ph1+depressionr.ph1+insurancer.ph1 + singleton.ph1, data=alldataT, robust=TRUE) summary(naiveT1p23a) alldataT$naiveInflT1p23T1<-resid(naiveT1p23a,type="dfbeta",weighted=FALSE)[,1] ###### Need to specifically write weighted=FALSE alldataT$naiveInflT1p23T23<-resid(naiveT1p23a,type="dfbeta",weighted=FALSE)[,2] ###### Now making same phase 2 variables head(triData.ph2) triData.ph2$estWtChangePerWkT1<-with(triData.ph2,est_wt_change_tri1/tri1_days) triData.ph2$estWtChangePerWkT2<-with(triData.ph2,est_wt_change_tri2/tri2_days) triData.ph2$estWtChangePerWkT3<-with(triData.ph2,est_wt_change_tri3/tri3_birth_days) triData.ph2$estWtChangePerWkT23<-with(triData.ph2, (est_wt_change_tri2+est_wt_change_tri3)/(tri2_days+tri3_birth_days)) triData.ph2$est_wt_change_tri23<-with(triData.ph2,est_wt_change_tri2+est_wt_change_tri3) triData.ph2$propWtGainT3<-with(triData.ph2, est_wt_change_tri3/est_wt_change) boxplot(triData.ph2$propWtGainT3) summary(triData.ph2$propWtGainT3) ### The proportion weight gained in trimester 3 is less than 0 and greater than 1 in some mothers. These values were truncated. triData.ph2[triData.ph2$propWtGainT3<0,] triData.ph2[triData.ph2$propWtGainT3>1,] triData.ph2$propWtGainT3.trunc<-with(triData.ph2,ifelse(propWtGainT3<0,0, ifelse(propWtGainT3>1,1,propWtGainT3))) summary(triData.ph2$propWtGainT3.trunc) ### Some women did not have a 3rd trimester (they delivered their baby early), so we exclude these women from ### analyses that look at 3rd trimester weight gain. ### Not sure this is relevant anymore, but I'll keep for now. triData.ph2$include<-with(triData.ph2, ifelse(!is.na(estWtChangePerWkT3),1,0)) ### Still high correlations in phase 2 data with regards to the weight change in different trimesters with(triData.ph2, cor(est_wt_change_tri1,est_wt_change_tri2)) with(triData.ph2, cor(est_wt_change_tri1[include==1],est_wt_change_tri3[include==1])) with(triData.ph2, cor(est_wt_change_tri2[include==1],est_wt_change_tri3[include==1])) with(triData.ph2, cor(est_wt_change_tri1,est_wt_change_tri23)) ### Correlation with weight change per week during 3rd trimester much lower after dividing by the length of third ### trimester -- presumably because of the variation in the length of third trimester with(triData.ph2, cor(estWtChangePerWkT1,estWtChangePerWkT2)) with(triData.ph2, cor(estWtChangePerWkT1[include==1],estWtChangePerWkT3[include==1])) with(triData.ph2, cor(estWtChangePerWkT2[include==1],estWtChangePerWkT3[include==1])) with(triData.ph2, cor(estWtChangePerWkT1[include==1],estWtChangePerWkT23[include==1])) with(triData.ph2, summary(tri3_birth_days[include==1])) triData.ph2.simple<-triData.ph2[,c("studyid","estWtChangePerWkT1","estWtChangePerWkT2", "estWtChangePerWkT3","estWtChangePerWkT23","propWtGainT3.trunc", "est_wt_change_tri1","est_wt_change_tri2","est_wt_change_tri3","est_wt_change_tri23","include")] alldataT<-merge(alldataT,triData.ph2.simple,by.x="studyid.ph1",by.y="studyid",all.x=TRUE) ########################################################################### ########################################################################### ##### Study designs for single frame twophase sampling ########################################################################### ########################################################################### ### IPW designs designO <- twophase(id=list(~1,~1),strata=list(NULL,~wave4Strata),subset=~in.Ophase2, data=alldataT,method="approx") designA <- twophase(id=list(~1,~1),strata=list(NULL,~wave6Strata),subset=~in.Aphase2, data=alldataT[alldataT$in.Aframe==1,],method="approx") ########################################################################### ########################################################################### ##### Set up multi-frame data and weights ########################################################################### ########################################################################### ### Weights Oframe ### correct in.Ophase2 for(s in unique(alldataT$wave4Strata)){ Istrat<-alldataT$wave4Strata==s Ns<-sum(Istrat) ns<-sum(Istrat & alldataT$in.Ophase2==1) alldataT$wtO[Istrat]<-Ns/ns alldataT$NSO<-Ns } ### Weights Aframe ### correct in.Aphase2 ### Note, wave6Strata is only nonMissing for in.Aframe=1. So ns and Ns will be correct for Aframe subset. for(s in unique(alldataT$wave6Strata[!is.na(alldataT$wave6Strata)])){ Istrat<-alldataT$wave6Strata==s Ns<-sum(Istrat,na.rm=T) ns<-sum(Istrat & alldataT$in.Aphase2==1,na.rm=T) alldataT$wtA[Istrat]<-Ns/ns alldataT$NsA<-Ns } Oframe<-alldataT[alldataT$in.Oframe==1,] Oframe$MFstrata<-Oframe$wave4Strata Aframe<-alldataT[alldataT$in.Aframe==1,] Aframe$MFstrata<-Aframe$wave6Strata mfdata<-data.frame(rbind(Oframe,Aframe)) dim(mfdata) dim(mfdata[mfdata$in.Aframe==1,]) ### Multiframe adjustment (Metcalf and Scott SIM 2009, v28; 1512-1523) mfdata$wt.tilde<-ifelse((mfdata$in.Oframe*(1-mfdata$in.Aframe))==1,mfdata$wtO,0) + ifelse((mfdata$in.Aframe*(1-mfdata$in.Oframe))==1,mfdata$wtA,0) + ifelse((mfdata$in.Oframe*mfdata$in.Aframe)==1,(mfdata$wtA*mfdata$wtO)/(mfdata$wtA+mfdata$wtO),0) #### error checking HH weights mfdata$in.Oonly<-ifelse(mfdata$in.Oframe*(1-mfdata$in.Aframe),TRUE,FALSE) mfdata$in.Aonly<-ifelse(mfdata$in.Aframe*(1-mfdata$in.Oframe),TRUE,FALSE) mfdata$in.both<-ifelse((mfdata$in.Oframe*mfdata$in.Aframe)==1,TRUE,FALSE) table(mfdata$wt.tilde[mfdata$in.Oonly]==mfdata$wtO[mfdata$in.Oonly]) table(mfdata$wt.tilde[mfdata$in.Aonly]==mfdata$wtA) ### Nobody in this category table(round(mfdata$wt.tilde[mfdata$in.both],5)==round((1/(1/mfdata$wtA[mfdata$in.both]+1/mfdata$wtO[mfdata$in.both])),5)) designMF <- twophase(id=list(~1,~1),weights=list(~1,~wt.tilde),strata=list(NULL,~MFstrata), data=mfdata,subset=~in.phase2,method="approx") ########################################################################### ### Obesity Models: Phase 2 data with naive raking variables ########################################################################### Ninflcal<-calibrate(designMF,formula=~naiveInflT1p23T1+naiveInflT1p23T23+MFstrata,phase=2,calfun="raking") modT1p23<-svycoxph(Surv(ttobesity,obesity)~est_wt_change_tri1+ est_wt_change_tri23+ est_bmi_preg_mother +mat_age_delivery+ relevel(as.factor(mat4race),ref="White")+ hispanic+relevel(as.factor(mat_diabetes3),ref="None") + cesarean + male+tobacPregr+ depressionr+ insurancer+ singleton + married + number_children + egaWk,design=Ninflcal) summary(modT1p23) svymean(~est_wt_change_tri1, design=designMF) svyquantile(~est_wt_change_tri1, design=designMF, c(.5, .25, .75, .2, .8)) svymean(~est_wt_change_tri2, design=designMF) svyquantile(~est_wt_change_tri2, design=designMF, c(.5, .25, .75)) svymean(~est_wt_change_tri3, design=designMF) svyquantile(~est_wt_change_tri3, design=designMF, c(.5, .25, .75)) svymean(~est_wt_change_tri23, design=designMF) svyquantile(~est_wt_change_tri23, design=designMF, c(.5, .25, .75, .2, .8)) summary(modT1p23) exp(5*(modT1p23$coefficients["est_wt_change_tri23"])) exp(5*(modT1p23$coefficients["est_wt_change_tri23"]+c(-1,1)*1.96*sqrt(modT1p23$var[2,2]))) Ninflcal<-calibrate(designMF,formula=~naiveInflT1p23T1+naiveInflT1p23T23+MFstrata,phase=2,calfun="raking") modT1p23inter<-svycoxph(Surv(ttobesity,obesity)~est_wt_change_tri1*est_wt_change_tri23+ est_bmi_preg_mother +mat_age_delivery+ relevel(as.factor(mat4race),ref="White")+ hispanic+relevel(as.factor(mat_diabetes3),ref="None") + cesarean + male+tobacPregr+ depressionr+ insurancer+ singleton + married + number_children + egaWk,design=Ninflcal) summary(modT1p23inter)