##### Bootstrap of the NA-ACCORD and ART-CC approaches rm(list=ls()) setwd("/Volumes/encrypted/cnics/analyses-2013-Nov") library(survival) ################## FUNCTIONS NECESSARY FOR DOING THE COLE APPROACH ################################# ########## Simulating generalized gamma data qggamma <- function(beta, sigma, lambda, p) { s<-lambda^(-2) r<-s*exp((-beta*lambda)/sigma) b<-lambda/sigma quan<-(lambda>0)*((qgamma(p, shape=s, rate=r))^(1/b)) quan<- quan+(lambda<0)*((qgamma(1-p,shape=s, rate=r))^(1/b)) quan } ###### Parameters received from Sterne, May, and Cole par.ests.d<- matrix(c(-0.01976,1.4140,0.8073,-0.3376,0.9424,0.9799,0.004661,450, 0.1292,1.4136,0.8473,0.1036,1.2702,0.8694,0.006369,425, 0.1378,1.4581,0.7093,-0.1678,1.2230,0.7157,0.006523,400, 0.1985,1.4208,0.7989,-0.1855,1.2914,0.7565,0.006381,375, 0.2142,1.3600,0.8139,0.3155,1.4237,0.6343,0.007307,350, 0.2717,1.2890,0.9180,0.005815,0.9229,0.6943,0.007987,325, 0.2535,1.2752,0.8687,-0.2549,1.2515,-0.09287,0.01010,300, 0.3532,1.2246,1.0406,-0.05621,1.2546,0.4186,0.01115,275, 0.3387,1.2135,0.9881,-0.1256,1.6811,-0.08963,0.01367,250, 0.3131,1.2104,0.9577,-0.2491,1.7802,-0.08660,0.01483,225, 0.3287,1.1857,0.9952,0.1319,1.4342,0.7271,0.01549,200, 0.2843,1.1348,0.9281,0.4699,0.7067,1.9816,0.01810,175, 0.3027,1.0971,0.9425,0.4677,0.6664,1.8550,0.02228,150, 0.2842,1.0680,0.9363,0.4622,0.7501,1.6866,0.02700,125, 0.2362,1.0575,0.8927,0.2233,1.1631,0.7395,0.03686,100),ncol=8,byrow=TRUE) ###### mu1, sig1, del1, mu2, sig2, del2, pi, threshold par.ests.e<-matrix(c(-0.03244,1.4193,0.7969,0.4088,1.0706,1.2097,0.01610,450, 0.1132,1.4260,0.8284,0.5776,0.9573,1.4753,0.02273,425, 0.1211,1.4686,0.6908,0.5509,1.0282,1.3532,0.02691,400, 0.1938,1.4293,0.7916,0.1801,1.1994,0.8842,0.02830,375, 0.2147,1.3643,0.8099,0.2086,2.3632,0.8688,0.03197,350, 0.2830,1.2848,0.9275,0.002276,1.1950,0.8346,0.03249,325, 0.2590,1.2710,0.8762,0.07611,1.3065,0.6431,0.03828,300, 0.3509,1.2137,1.0532,0.3388,1.4096,0.7650,0.05289,275, 0.3211,1.2222,0.9638,0.5177,1.2067,1.1048,0.06128,250, 0.2951,1.2273,0.9177,0.4338,1.1252,1.2341,0.06974,225, 0.3231,1.1839,0.9771,0.3187,1.2826,1.0666,0.08119,200, 0.2906,1.1298,0.9100,0.2165,1.1235,1.1676,0.09472,175, 0.3235,1.0885,0.9236,0.1419,1.0550,1.2139,0.1079,150, 0.2989,1.0653,0.8941,0.1774,1.0114,1.2725,0.1309,125, 0.2417,1.0461,0.8189,0.1073,1.1676,1.0102,0.1759,100),ncol=8,byrow=TRUE) ##### This function performs the analysis of Cole et al. hr.compute.lt<-function(lower,upper,threshold.cd4,n.impute,cd4,t.event,event,par.ests,truncate=NA,discrete=FALSE,cd4.3=FALSE,age.haart=NA,age.cd4=NA) { select1<-ifelse(cd4>lower & cd4<=upper,1,0) select<-ifelse(is.na(select1),0,select1) if (cd4.3){ select<-ifelse(is.na(age.cd4),select, ifelse(age.haart-age.cd4>0.25,0,select)) } t<-t.event[select==1] event.orig<-event[select==1] defer.orig<-ifelse(cd4[select==1]<=threshold.cd4,1,0) row<-which(par.ests[,8]==threshold.cd4) pi.est<-par.ests[row,7] n<-sum(defer.orig) beta<-se<-NULL for (i in 1:n.impute) { events<-event.orig defer<-defer.orig t.f<-ifelse(defer==0,0,qggamma(par.ests[row,1],par.ests[row,2],par.ests[row,3],runif(n,0,1))) t.star<-t+t.f # u<-rbinom(1,n,pi.est/(1-pi.est)) u<-rnbinom(1,n,1-pi.est) if (u>0) { t.g<-qggamma(par.ests[row,4],par.ests[row,5],par.ests[row,6],runif(u,0,1)) t.star<-c(t.star,t.g) defer<-c(defer,rep(1,u)) events<-c(events,rep(1,u)) } if (!is.na(truncate)) { t.star<-ifelse(t.star>truncate,truncate,t.star) events<-ifelse(t.star>truncate,0,events) } S<-Surv(t.star,events) mod<-coxph(S~defer) beta[i]<-summary(mod)$coeff[1] se[i]<-summary(mod)$coeff[3] if (discrete) { t.star0<-t.star[t.star>0] defer0<-defer[t.star>0] events0<-events[t.star>0] t.star.months<-ceiling(t.star0*12) t.months<-1:t.star.months[1] defer.m<-rep(defer0[1],t.star.months[1]) events.m<-rep(0,t.star.months[1]) events.m[t.star.months[1]]<-events0[1] for (j in 2:length(t.star0)) { t.months<-c(1:t.star.months[j],t.months) defer.m<-c(rep(defer0[j],t.star.months[j]),defer.m) stuff<-rep(0,t.star.months[j]) stuff[t.star.months[j]]<-events0[j] events.m<-c(stuff,events.m) } mod<-glm(events.m~rcs(t.months,4) + defer.m, family="binomial") beta[i]<-summary(mod)$coeff["defer.m",1] se[i]<-summary(mod)$coeff["defer.m",2] } } hr<-exp(mean(beta)) var<-mean(se^2)+var(beta)*(1+1/n.impute) ci<-exp(mean(beta)+1.96*sqrt(var)*c(-1,1)) label<-paste(paste(lower,threshold.cd4-1,sep="-"),paste(threshold.cd4,upper-1,sep="-"),sep=" vs. ") return(c(label,hr,ci)) } ######### FUNCTIONS NECESSARY FOR DOING THE HERNAN APPROACH ###################### #------------------------# # Functions # #------------------------# #---------------------------------------------------------------------# # add.fun # #---------------------------------------------------------------------# # In creating monthly (time-dependent) data frame, records are inserted # that do not exist in the original data. This creates missing values for # CD4, viral loads, CD4 %, etc. Add.fun fills in these values with the # last non-missing observation and records the age at which that non-missing # value occurred. add.fun <- function(labage.1, labage.2){ var <- names(labage.1)[3] labage.1$order <- 1 labage.2$order <- 0 pullData <- rbind(labage.1,labage.2) pullData$age <- round(pullData$age, 4) pullData <- pullData[order(pullData$pid, pullData$age, pullData$order),] hasVal <- diff(c(0L,as.integer(as.factor(pullData$pid)))) != 0 | !is.na(pullData[,var]) | (pullData$order == 0) index <- rep(which(hasVal), times=table(cumsum(hasVal))) pullData[,c(var, paste("age", var, sep='.'))] <- pullData[index, c(var, "age")] return(pullData) } ####### Analysis function that does pretty much everything hernan.analysis<-function(d2, u.limit, l.limit, filename, start.within=180/365.25, LOST=FALSE, COVARIATES=FALSE, SITE.YEAR=FALSE, EXCLUDE.IDU=FALSE, DEATH.ONLY=FALSE, CENSOR.TIME=FALSE, EXCLUDE.DEATHS=FALSE, ADD2=FALSE, EXCLUDE.NON.HAART.ART=TRUE, TRUNC.LEVEL=0.99) { if (EXCLUDE.NON.HAART.ART) { ### Currently, this is the only option. At some point, I should censor but I'm not sure VU gave me this data. d2<-d2[d2$non.haart.art==0,] d2$non.haart.art<-NULL } if (ADD2==TRUE){ d2$last.age<-with(d2, ifelse(deceased==0, last.age+60/365.25, last.age)) } # Identifying age at which CD4 is less than ulimit by PID less.ulimit <- lapply(split(subset(d2, select=c(pid,age,cd4)), d2$pid), FUN=function(y){ y[min(which(y[,"cd4"] < u.limit),na.rm=TRUE),"age"]}) less.ulimit.df <- data.frame("pid"=names(less.ulimit), "age.less.ulimit"=unlist(less.ulimit)) less.ulimit.df$pid <- as.character(less.ulimit.df$pid) d2[,"age.less.ulimit"] <- less.ulimit.df[match(d2$pid,less.ulimit.df$pid,nomatch=NA), "age.less.ulimit"] d2 <- d2[order(d2$pid, d2$age),] # If a PID has a missing age.less.ulimit, then they did not have a CD4 below the upper # limit and should be deleted from the analysis d2 <- subset(d2, !is.na(age.less.ulimit)) # Time.in.care is the time between the first recorded clinic visit and the age a PIDs CD4 fell # below the upper limit d2$time.in.care <- d2$age.less.ulimit - d2$first.age # If someone died, their true last age should be the age of death # Otherwise it is their last recorded age. d2$true.last.age<-with(d2,ifelse(is.na(age_at_death),last.age,age_at_death)) # Total follow-up time is the time from when a PIDs CD4 fell below the upper limit to their true.last.age d2$tot.fu <- d2$true.last.age - d2$age.less.ulimit d2<-d2[d2$tot.fu>=0,] #Doesn't appear to be needed (2014-03-07) # In the original data, each record at which an OI occurred had a non-missing age.oi. All other records # had a missing age.oi. For analysis purposes, those OIs that occurred between a PIDs first age and # age.less.ulimit were recorded as prior.oi and the age recorded. For those OIs that occurred after # age.less.ulimit, only the first occurrence of an OI will be included in the analysis and the variable # age.oi will repeat the age at which the first OI after age.less.ulimit occurred. The following # code addresses both of these issues. # Defining prior.oi (OI between first.age and age.less.ulimit) age.oi <- subset(d2, !is.na(age.oi), select=c(pid, age.oi, age.less.ulimit)) age.oi$prior.oi <- ifelse(round(age.oi$age.oi,4) <= round(age.oi$age.less.ulimit,4),1,0) ##### Added by Bryan to try to simplify prior.oi.df<-data.frame(pid=unique(age.oi$pid[age.oi$prior.oi==1]),prior.oi=1) d2<-merge(d2,prior.oi.df,by="pid",all.x=TRUE) d2 <- d2[order(d2$pid,d2$age),] d2$prior.oi <- ifelse(is.na(d2$prior.oi),0,d2$prior.oi) # Grabbing first incidence of OI after age.less.ulimit if more than one oi age.oi.af <- lapply(split(age.oi, age.oi$pid), FUN=function(y){ ifelse(all(y[,"prior.oi"]==1),NA,min(y[y[,"prior.oi"]==0,"age.oi"]))}) age.oi.af.df <- data.frame("pid"=unlist(names(age.oi.af)), "age.oi.af"=unlist(age.oi.af)) age.oi.af.df$pid <- as.character(age.oi.af.df$pid) age.oi.af.df<-age.oi.af.df[!is.na(age.oi.af.df$age.oi.af),] d2 <- merge(d2, age.oi.af.df, by="pid", all.x=TRUE) d2 <- d2[order(d2$pid,d2$age),] d2$age.oi<-d2$age.oi.af d2$age.oi.af<-NULL # Subsetting out PIDs with prior.oi d2 <- subset(d2, prior.oi != 1) unique.id<-unique(d2$pid) gt.llimit.df <- subset(d2, round(age,4) == round(age.less.ulimit,4), select=c(pid,age,age.less.ulimit,cd4)) gt.llimit.df$gt.llimit <- ifelse(gt.llimit.df$cd4 >= l.limit,1,0) d2[,"gt.llimit"] <- gt.llimit.df[match(d2$pid, gt.llimit.df$pid,nomatch=NA), "gt.llimit"] sub.d2 <- subset(d2, (age.fhaart >= age.less.ulimit | is.na(age.fhaart)) & gt.llimit == 1) sub.d2 <- sub.d2[order(sub.d2$pid,sub.d2$age),] sub.d2$gt.llimit<-NULL sub.unique.id <- unique(sub.d2$pid) d2 <- sub.d2 ####### I'm being bold, and changing the structure of the code here. I'm going to expand to months first and then I'll define ####### treatment groups. #### I'm going to keep all of the repeating covariates to avoid changing them later, but I'll grab baseline values tmp<-d2[!duplicated(d2$pid),] tmp$age.rx.start<-tmp$haart_before_first_visit<-NULL tmp$age<-tmp$oi<-tmp$last.age<-tmp$prior.oi<-tmp$haart<-tmp$fhaart<-tmp$age.approx<-NULL max.follow <- 12*max(tmp$tot.fu, na.rm=TRUE) months <- seq(0, ceiling(max.follow),by=1) tmp$month <- 0 tmp2<-data.frame(pid=rep(tmp$pid,each=length(months[-1])), site=rep(tmp$site,each=length(months[-1])), black=rep(tmp$black,each=length(months[-1])), white=rep(tmp$white,each=length(months[-1])), deceased=rep(tmp$deceased,each=length(months[-1])), age_at_death=rep(tmp$age_at_death,each=length(months[-1])), idu=rep(tmp$idu,each=length(months[-1])), age.oi=rep(tmp$age.oi,each=length(months[-1])), cd4=NA, male=rep(tmp$male,each=length(months[-1])), first.age=rep(tmp$first.age,each=length(months[-1])), first.year=rep(tmp$first.year,each=length(months[-1])), ltfu=rep(tmp$ltfu,each=length(months[-1])), age.fhaart=rep(tmp$age.fhaart,each=length(months[-1])), age.less.ulimit=rep(tmp$age.less.ulimit,each=length(months[-1])), time.in.care=rep(tmp$time.in.care,each=length(months[-1])), true.last.age=rep(tmp$true.last.age,each=length(months[-1])), tot.fu=rep(tmp$tot.fu,each=length(months[-1])), month=rep(months[-1],times=length(tmp$pid))) surv.df<-rbind(tmp,tmp2) surv.df <- surv.df[order(surv.df$pid, surv.df$month),] surv.df$age <- round(surv.df$age.less.ulimit + surv.df$month/12,4) ###### Defining month of first HAART using the larger dataframe locate.fhaart <- lapply(split(subset(surv.df, select=c(pid,age,age.fhaart)),surv.df$pid), FUN=function(y){ y[min(which(y[,"age"] >= y[,"age.fhaart"])),"age"] }) locate.fhaart.df <- data.frame("pid"=names(locate.fhaart)[!is.na(locate.fhaart)], "new.age.fhaart"=unlist(locate.fhaart[!is.na(locate.fhaart)])) surv.df[,"new.age.fhaart"] <- locate.fhaart.df[match(surv.df$pid,locate.fhaart.df$pid,nomatch=NA), "new.age.fhaart"] surv.df$fhaart <- ifelse(round(surv.df$age,4) == round(surv.df$new.age.fhaart,4) & !is.na(surv.df$new.age.fhaart),1, ifelse(is.na(surv.df$new.age.fhaart) | round(surv.df$age,4) != round(surv.df$new.age.fhaart,4),0,NA)) locate.oi <- lapply(split(subset(surv.df, select=c(pid,age,age.oi)),surv.df$pid), FUN=function(y){ y[min(which(y[,"age"] >= y[,"age.oi"])),"age"] }) locate.oi.df <- data.frame("pid"=names(locate.oi)[!is.na(locate.oi)], "new.age.oi"=unlist(locate.oi[!is.na(locate.oi)])) surv.df[,"new.age.oi"] <- locate.oi.df[match(surv.df$pid,locate.oi.df$pid,nomatch=NA), "new.age.oi"] surv.df$oi <- ifelse(round(surv.df$age,4) == round(surv.df$new.age.oi,4) & !is.na(surv.df$new.age.oi),1, ifelse(is.na(surv.df$new.age.oi) | round(surv.df$age,4) != round(surv.df$new.age.oi,4),0,NA)) ###### Getting rid of time after follow-up. I did this here, because some of the above code needed it. # # #surv.df <- subset(surv.df, round(age,4) <=round(true.last.age,4)) ### Commented out on 2014-03-07 # # # # # # surv.df$death <- ifelse(is.na(surv.df$age_at_death),0, # # ifelse((round(surv.df$age,4) == # # round(surv.df$true.last.age,4)) | # # (surv.df$age + 1/12 > surv.df$true.last.age),1,0)) # # # # # # surv.df$oi <- ifelse(is.na(surv.df$age.oi),0, # # ifelse((surv.df$age + 1/12) > surv.df$true.last.age & # # (surv.df$age.oi + 1/12) > surv.df$true.last.age,1, # surv.df$oi)) surv.df <- subset(surv.df, round(age,4) < round(true.last.age,4) + 1/12) ### Changed on 2014-03-07 surv.df$death <- ifelse(is.na(surv.df$age_at_death),0, ifelse((round(surv.df$age,4) == round(surv.df$true.last.age,4)) | (surv.df$age > surv.df$true.last.age),1,0)) surv.df$oi <- ifelse(is.na(surv.df$age.oi),0, ifelse((surv.df$age ) > surv.df$true.last.age & (surv.df$age.oi) > surv.df$true.last.age,1, surv.df$oi)) surv.df$month<-ifelse(surv.df$month==0&surv.df$death==1,1,surv.df$month) if (DEATH.ONLY) { surv.df$event <- ifelse(surv.df$death == 0, 0,1) } if (!DEATH.ONLY) { surv.df$event <- ifelse(surv.df$death == 0 & surv.df$oi == 0,0,1) } ###### Adding CD4 counts ref.cd4 <- data.frame("pid"=d2$pid[!duplicated(d2$pid)], # "ref.cd4"=d2$cd4[round(d2$age,4)==round(d2$age.less.ulimit,4)], #### Bryan changed because I'll use CD4 at first visit as covariate (This was probably a poor choice, I fix back later - 2014-03-10) "ref.cd4"=d2$cd4[!duplicated(d2$pid)], "age.ref.cd4"=d2$age.less.ulimit[!duplicated(d2$pid)], "age.fhaart"=d2$age.fhaart[!duplicated(d2$pid)]) surv.df <- merge(surv.df, ref.cd4, all=TRUE) surv.df <- surv.df[order(surv.df$pid,surv.df$age),] labage.1 <- surv.df[,c("pid","age","cd4")] labage.1$cd4<-NA labage.2 <- subset(d2, pid %in% surv.df$pid, select=c(pid,age,cd4)) names(labage.2) <- c("pid","age","cd4") labage.2 <- subset(labage.2, !is.na(cd4)) pullData <- add.fun(labage.1, labage.2) surv.df$cd4 <- pullData[pullData$order==1,"cd4"] surv.df$age <- round(surv.df$age, 4) surv.df$age.fhaart <- round(surv.df$age.fhaart,4) surv.df$age.cd4 <- pullData[pullData$order==1, "age.cd4"] surv.df$time.to.cd4 <- 12*(surv.df$age - surv.df$age.cd4) junk<-subset(surv.df[!duplicated(surv.df$pid),],select=c(pid,cd4)) ###### The wrong ref.cd4 was saved above. I'm fixing it here to be the CD4 when first going below upper limit. 2014-03-10 surv.df<-merge(surv.df,junk,all.x=TRUE) surv.df<-surv.df[order(surv.df$pid,surv.df$age),] ## Need to ensure that for cd4 that age.cd4 ## is less than age.fhaart when fhaart==1 # Adjusting those who started HAART and had a CD4 count in that month after # the age.fhaart which.cd4 <- which(round(surv.df$age.fhaart,4) < round(surv.df$age.cd4,4) & surv.df$fhaart == 1 & round(surv.df$age.cd4,4) > round(surv.df$age.less.ulimit,4)) surv.df$cd4[which.cd4] <- surv.df$cd4[(which.cd4-1)] surv.df$age.cd4[which.cd4] <- surv.df$age.cd4[(which.cd4-1)] surv.df$time.to.cd4[which.cd4] <- 12*(surv.df$age[which.cd4]-surv.df$age.cd4[which.cd4]) ### Line added 12/12/14 (should have minor impact) #### do the same thing for oi: adjusting those who had oi and CD4 count in the same month which.cd4.oi <- which(round(surv.df$age.oi,4) < round(surv.df$age.cd4,4) & surv.df$oi== 1 & round(surv.df$age.oi,4) > round(surv.df$age.less.ulimit,4)) surv.df$cd4[which.cd4.oi] <- surv.df$cd4[(which.cd4.oi-1)] surv.df$age.cd4[which.cd4.oi] <- surv.df$age.cd4[(which.cd4.oi-1)] surv.df$time.to.cd4[which.cd4.oi] <- 12*(surv.df$age[which.cd4.oi]-surv.df$age.cd4[which.cd4.oi]) ### Line added 12/12/14 (should have minor impact) if (!DEATH.ONLY){ surv.df<-surv.df[is.na(surv.df$new.age.oi) | surv.df$age<=surv.df$new.age.oi,] } surv.df$haart<-with(surv.df, ifelse(is.na(new.age.fhaart),0, ifelse(age=1) { # locate.cd4.threshold[i]<-which(surv.df$age.cd4==min(surv.df$age.cd4[surv.df$cd4start.within) | (haart==1 & age.fhaart-age.less.ulimitage.cd4.llimit+start.within & (age.fhaart>age.cd4.llimit+start.within | is.na(age.fhaart)) & deferred1==1,1,0)) surv.df$cens.start.early<-with(surv.df, ifelse(haart==1 & (age.fhaart=age.cd4.llimit & age=age.cd4.llimit & agew.trunc,w.trunc,d.analysis$weight) with(d.analysis, table(deferred, event)) with(d.analysis, mean(event[deferred==0])) with(d.analysis, mean(event[deferred==1])) chisq.test(with(d.analysis, table(deferred, event))) if (!COVARIATES & !SITE.YEAR){ ### Simple model mod<-glm(event~rcs(month,4) + deferred , weights=weight.trunc, data=d.analysis, family=binomial) } if (COVARIATES & !SITE.YEAR){ ### Model with covariates mod<-glm(event~rcs(month,4) + deferred + ref.cd4 + age.less.ulimit + male, weights=weight.trunc, family="binomial", data=d.analysis) } if (!COVARIATES & SITE.YEAR){ ### Model stratifying by site and year mod<-glm(event~rcs(month,4) + site + year.less.ulimit + deferred, weights=weight.trunc, family="binomial",data=d.analysis) } if (COVARIATES & SITE.YEAR){ ### Model with covariates and stratifying by site and year mod<-glm(event~rcs(month,4) + site + year.less.ulimit + deferred + ref.cd4 + age.less.ulimit + male, #### Model may be too rich weights=weight.trunc, family="binomial", data=d.analysis) } #mod<-glm(event~rcs(month,4)+site+year.less.ulimit + deferred + ref.cd4 + age.less.ulimit + male, # weights=weight.trunc, family="binomial", data=d.analysis) library(sandwich) se<-sqrt(sandwich(mod)["deferred","deferred"]) hr<-exp(summary(mod)$coeff["deferred","Estimate"]) lower<-exp(mod$coeff[names(mod$coeff)=="deferred"]-1.96*se) upper<-exp(mod$coeff[names(mod$coeff)=="deferred"]+1.96*se) ests<-c(hr,lower,upper) # save(ests,u.limit,l.limit,LOST,COVARIATES,SITE.YEAR,EXCLUDE.IDU,DEATH.ONLY,CENSOR.TIME,EXCLUDE.DEATHS,start.within,file=filename) ests } ######################## Here I do the analyses library(rms) load("cnics-hernan-stuff5c.Rda") #### Dataset for Hernan analyses load("cnics-cole-stuff5.Rda") #### Dataset for Cole analyses ##### Listing all of the ids all.ids<-unique(d2$pid) all.ids<-all.ids[order(all.ids)] Nboot<-200 for (ii in 195:199) { set.seed(ii) ##### Sampling ids. First sample numbers and then find corresponding id. boot.samp<-sample(1:length(all.ids), length(all.ids), replace=TRUE) ids.boot<-all.ids[boot.samp] ##### Creating new bootstrap dataframe d.boot for Cole approach. (Fast) d.junk<-data.frame(id=all.ids) d.plus<-merge(d,d.junk,by="id",all.y=TRUE) d.plus<-d.plus[order(d.plus$id),] d.boot1<-d.plus[boot.samp,] d.boot<-d.boot1[!is.na(d.boot1$male),] ##### Creating new bootstrap dataframe d2.boot for Hernan approach. (Slow - 2:12) index.grouped <- split(seq_len(nrow(d2)), d2$pid) index <- unlist(index.grouped[ids.boot]) d2.boot <- d2[index, ] d2.boot$count <- 1 while(sum(duplicated(d2.boot))){ d2.boot$count<-ifelse(duplicated(d2.boot),d2.boot$count+1,d2.boot$count) } d2.boot$pid<-paste(d2.boot$pid,d2.boot$count,sep="-") d2.boot$count<-NULL # df.bs<-sapply(ids.boot,function(x)which(d2$pid==x)) # d2.boot<-d2[unlist(df.bs),] # d2.boot$count<-ifelse(duplicated(d2.boot),2,1) # d2.boot$count<-ifelse(duplicated(d2.boot),3,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),4,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),5,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),6,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),7,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),8,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),9,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),10,d2.boot$count) # d2.boot$pid<-paste(d2.boot$pid,d2.boot$count,sep="-") # d2.boot$count<-NULL # # d2.boot1<-d2.boot # saving table for checking # # library(data.table) # d2<-data.table(d2) # stuff<-date() # # # d2.boot<-d2[d2$pid==ids.boot[1],] # for (i in 2:length(ids.boot)){ # d2.boot<-rbind(d2.boot,d2[d2$pid==ids.boot[i],]) # } # d2.boot$count<-ifelse(duplicated(d2.boot),2,1) # d2.boot$count<-ifelse(duplicated(d2.boot),3,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),4,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),5,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),6,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),7,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),8,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),9,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),10,d2.boot$count) # d2.boot$pid<-paste(d2.boot$pid,d2.boot$count,sep="-") # #d2.boot$count<-NULL # stuff # date() #### Basic Analyses hernan.100b<-hernan.analysis(d2.boot, u.limit=201, l.limit=101, filename="basic-200vs100.Rda") hernan.200b<-hernan.analysis(d2.boot, u.limit=301, l.limit=201, filename="basic-300vs200.Rda") hernan.300b<-hernan.analysis(d2.boot, u.limit=401, l.limit=301, filename="basic-400vs300.Rda") hernan.400b<-hernan.analysis(d2.boot, u.limit=501, l.limit=401, filename="basic-500vs400.Rda") #### Basic Analyses using death as outcome hernan.d.100b<-hernan.analysis(d2.boot, u.limit=201, l.limit=101, DEATH.ONLY=TRUE, filename="death-200vs100.Rda") hernan.d.200b<-hernan.analysis(d2.boot, u.limit=301, l.limit=201, DEATH.ONLY=TRUE, filename="death-300vs200.Rda") hernan.d.300b<-hernan.analysis(d2.boot, u.limit=401, l.limit=301, DEATH.ONLY=TRUE, filename="death-400vs300.Rda") hernan.d.400b<-hernan.analysis(d2.boot, u.limit=501, l.limit=401, DEATH.ONLY=TRUE, filename="death-500vs400.Rda") ###### Original had 20 imputations. Because of computation, I am only going to do 5. sterne.d.100b<-with(d.boot,hr.compute.lt(lower= -1,upper=200,threshold.cd4=100,n.impute=5,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d,discrete=TRUE)) sterne.d.200b<-with(d.boot,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=5,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d,discrete=TRUE)) sterne.d.300b<-with(d.boot,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=5,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d,discrete=TRUE)) sterne.d.400b<-with(d.boot,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=5,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d,discrete=TRUE)) sterne.100b<-with(d.boot,hr.compute.lt(lower= -1,upper=200,threshold.cd4=100,n.impute=5,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d,discrete=TRUE)) sterne.200b<-with(d.boot,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=5,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d,discrete=TRUE)) sterne.300b<-with(d.boot,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=5,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d,discrete=TRUE)) sterne.400b<-with(d.boot,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=5,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d,discrete=TRUE)) BootAns<-rbind(c(0,1,100,as.numeric(sterne.d.100b)[2:4]), c(0,1,200,as.numeric(sterne.d.200b)[2:4]), c(0,1,300,as.numeric(sterne.d.300b)[2:4]), c(0,1,400,as.numeric(sterne.d.400b)[2:4]), c(0,0,100,as.numeric(sterne.100b)[2:4]), c(0,0,200,as.numeric(sterne.200b)[2:4]), c(0,0,300,as.numeric(sterne.300b)[2:4]), c(0,0,400,as.numeric(sterne.400b)[2:4]), c(1,1,100,hernan.d.100b), c(1,1,200,hernan.d.200b), c(1,1,300,hernan.d.300b), c(1,1,400,hernan.d.400b), c(1,0,100,hernan.100b), c(1,0,200,hernan.200b), c(1,0,300,hernan.300b), c(1,0,400,hernan.400b)) colnames(BootAns)<-c("hernan","death","threshold","hr","lower","upper") filename<-paste("bootstrap-results/BootAns",ii,".Rda",sep="") save(BootAns,file=filename) } load("bootstrap-results/BootAns1.Rda") h0d1.100<-BootAns[1,4] h0d1.200<-BootAns[2,4] h0d1.300<-BootAns[3,4] h0d1.400<-BootAns[4,4] h0d0.100<-BootAns[5,4] h0d0.200<-BootAns[6,4] h0d0.300<-BootAns[7,4] h0d0.400<-BootAns[8,4] h1d1.100<-BootAns[9,4] h1d1.200<-BootAns[10,4] h1d1.300<-BootAns[11,4] h1d1.400<-BootAns[12,4] h1d0.100<-BootAns[13,4] h1d0.200<-BootAns[14,4] h1d0.300<-BootAns[15,4] h1d0.400<-BootAns[16,4] Nboot<-200 for (ii in 2:Nboot) { input.file<-paste("bootstrap-results/BootAns",ii,".Rda",sep="") load(input.file) h0d1.100<-c(h0d1.100,BootAns[1,4]) h0d1.200<-c(h0d1.200,BootAns[2,4]) h0d1.300<-c(h0d1.300,BootAns[3,4]) h0d1.400<-c(h0d1.400,BootAns[4,4]) h0d0.100<-c(h0d0.100,BootAns[5,4]) h0d0.200<-c(h0d0.200,BootAns[6,4]) h0d0.300<-c(h0d0.300,BootAns[7,4]) h0d0.400<-c(h0d0.400,BootAns[8,4]) h1d1.100<-c(h1d1.100,BootAns[9,4]) h1d1.200<-c(h1d1.200,BootAns[10,4]) h1d1.300<-c(h1d1.300,BootAns[11,4]) h1d1.400<-c(h1d1.400,BootAns[12,4]) h1d0.100<-c(h1d0.100,BootAns[13,4]) h1d0.200<-c(h1d0.200,BootAns[14,4]) h1d0.300<-c(h1d0.300,BootAns[15,4]) h1d0.400<-c(h1d0.400,BootAns[16,4]) } hist(log(h1d0.100)-log(h0d0.100)) hist(log(h1d0.200)-log(h0d0.200)) hist(log(h1d0.300)-log(h0d0.300)) hist(log(h1d0.400)-log(h0d0.400)) hist(log(h1d1.100)-log(h0d1.100)) hist(log(h1d1.200)-log(h0d1.200)) hist(log(h1d1.300)-log(h0d1.300)) hist(log(h1d1.400)-log(h0d1.400)) quantile(log(h1d0.100)-log(h0d0.100),c(.025,.975)) quantile(log(h1d0.200)-log(h0d0.200),c(.025,.975)) quantile(log(h1d0.300)-log(h0d0.300),c(.025,.975)) quantile(log(h1d0.400)-log(h0d0.400),c(.025,.975)) quantile(log(h1d1.100)-log(h0d1.100),c(.025,.975)) quantile(log(h1d1.200)-log(h0d1.200),c(.025,.975)) quantile(log(h1d1.300)-log(h0d1.300),c(.025,.975)) quantile(log(h1d1.400)-log(h0d1.400),c(.025,.975)) exp(quantile(log(h1d0.100)-log(h0d0.100),c(.025,.975))) exp(quantile(log(h1d0.200)-log(h0d0.200),c(.025,.975))) exp(quantile(log(h1d0.300)-log(h0d0.300),c(.025,.975))) exp(quantile(log(h1d0.400)-log(h0d0.400),c(.025,.975))) exp(quantile(log(h1d1.100)-log(h0d1.100),c(.025,.975))) exp(quantile(log(h1d1.200)-log(h0d1.200),c(.025,.975))) exp(quantile(log(h1d1.300)-log(h0d1.300),c(.025,.975))) exp(quantile(log(h1d1.400)-log(h0d1.400),c(.025,.975))) #### Analyses with original (non-bootstrap) data load("basic-200vs100.Rda") h1d0.100.orig<-ests[1] load("basic-300vs200.Rda") h1d0.200.orig<-ests[1] load("basic-400vs300.Rda") h1d0.300.orig<-ests[1] load("basic-500vs400.Rda") h1d0.400.orig<-ests[1] load("death-200vs100.Rda") h1d1.100.orig<-ests[1] load("death-300vs200.Rda") h1d1.200.orig<-ests[1] load("death-400vs300.Rda") h1d1.300.orig<-ests[1] load("death-500vs400.Rda") h1d1.400.orig<-ests[1] load("basic-sterne-results.Rda") h0d0.100.orig<-as.numeric(sterne.b[1,2]) h0d0.200.orig<-as.numeric(sterne.b[2,2]) h0d0.300.orig<-as.numeric(sterne.b[3,2]) h0d0.400.orig<-as.numeric(sterne.b[4,2]) h0d1.100.orig<-as.numeric(sterne.d.b[1,2]) h0d1.200.orig<-as.numeric(sterne.d.b[2,2]) h0d1.300.orig<-as.numeric(sterne.d.b[3,2]) h0d1.400.orig<-as.numeric(sterne.d.b[4,2]) sd0.100<-sd(log(h1d0.100)-log(h0d0.100)) sd0.200<-sd(log(h1d0.200)-log(h0d0.200)) sd0.300<-sd(log(h1d0.300)-log(h0d0.300)) sd0.400<-sd(log(h1d0.400)-log(h0d0.400)) sd1.100<-sd(log(h1d1.100)-log(h0d1.100)) sd1.200<-sd(log(h1d1.200)-log(h0d1.200)) sd1.300<-sd(log(h1d1.300)-log(h0d1.300)) sd1.400<-sd(log(h1d1.400)-log(h0d1.400)) plot(c(log(1/3),log(3)),c(0,1),type="n",xlab="Ratio of Hazard Ratios",ylab="",axes=FALSE) axis(1,at=log(c(1/3,1/2,1,2,3)),c(0.3,0.5,1,2,3)) mtext("Death/ADE") lines(quantile(log(h1d0.100)-log(h0d0.100),c(.025,.975)),rep(.8,2)) lines(quantile(log(h1d0.200)-log(h0d0.200),c(.025,.975)),rep(.6,2)) lines(quantile(log(h1d0.300)-log(h0d0.300),c(.025,.975)),rep(.4,2)) lines(quantile(log(h1d0.400)-log(h0d0.400),c(.025,.975)),rep(.2,2)) points(log(h1d0.100.orig)-log(h0d0.100.orig),.8,pch=3) points(log(h1d0.200.orig)-log(h0d0.200.orig),.6,pch=3) points(log(h1d0.300.orig)-log(h0d0.300.orig),.4,pch=3) points(log(h1d0.400.orig)-log(h0d0.400.orig),.2,pch=3) lines(log(h1d0.100.orig)-log(h0d0.100.orig)+c(-1,1)*1.96*sd0.100,rep(.75,2),col=2) lines(log(h1d0.200.orig)-log(h0d0.200.orig)+c(-1,1)*1.96*sd0.200,rep(.55,2),col=2) lines(log(h1d0.300.orig)-log(h0d0.300.orig)+c(-1,1)*1.96*sd0.300,rep(.35,2),col=2) lines(log(h1d0.400.orig)-log(h0d0.400.orig)+c(-1,1)*1.96*sd0.400,rep(.15,2),col=2) points(log(h1d0.100.orig)-log(h0d0.100.orig),.75,pch=3,col=2) points(log(h1d0.200.orig)-log(h0d0.200.orig),.55,pch=3,col=2) points(log(h1d0.300.orig)-log(h0d0.300.orig),.35,pch=3,col=2) points(log(h1d0.400.orig)-log(h0d0.400.orig),.15,pch=3,col=2) abline(v=0,lty=2) plot(c(log(1/3),log(3)),c(0,1),type="n",xlab="Ratio of Hazard Ratios",ylab="",axes=FALSE) axis(1,at=log(c(1/3,1/2,1,2,3)),c(0.3,0.5,1,2,3)) mtext("Death") lines(quantile(log(h1d1.100)-log(h0d1.100),c(.025,.975)),rep(.8,2)) lines(quantile(log(h1d1.200)-log(h0d1.200),c(.025,.975)),rep(.6,2)) lines(quantile(log(h1d1.300)-log(h0d1.300),c(.025,.975)),rep(.4,2)) lines(quantile(log(h1d1.400)-log(h0d1.400),c(.025,.975)),rep(.2,2)) points(log(h1d1.100.orig)-log(h0d1.100.orig),.8,pch=3) points(log(h1d1.200.orig)-log(h0d1.200.orig),.6,pch=3) points(log(h1d1.300.orig)-log(h0d1.300.orig),.4,pch=3) points(log(h1d1.400.orig)-log(h0d1.400.orig),.2,pch=3) lines(log(h1d1.100.orig)-log(h0d1.100.orig)+c(-1,1)*1.96*sd1.100,rep(.75,2),col=2) lines(log(h1d1.200.orig)-log(h0d1.200.orig)+c(-1,1)*1.96*sd1.200,rep(.55,2),col=2) lines(log(h1d1.300.orig)-log(h0d1.300.orig)+c(-1,1)*1.96*sd1.300,rep(.35,2),col=2) lines(log(h1d1.400.orig)-log(h0d1.400.orig)+c(-1,1)*1.96*sd1.400,rep(.15,2),col=2) points(log(h1d1.100.orig)-log(h0d1.100.orig),.75,pch=3,col=2) points(log(h1d1.200.orig)-log(h0d1.200.orig),.55,pch=3,col=2) points(log(h1d1.300.orig)-log(h0d1.300.orig),.35,pch=3,col=2) points(log(h1d1.400.orig)-log(h0d1.400.orig),.15,pch=3,col=2) abline(v=0,lty=2) pdf("plot-comparison-bootstrap-death.pdf",height=7,width=6) par(mar=c(5,.1,.5,.1)) plot(c(log(.25),log(3)),c(.1,.9),xlab="Ratio of Hazard Ratios",ylab="",axes=FALSE,type="n") abline(v=0,col=gray(.5)) axis(1,at=log(c(.33,.5,1,2,3)),c(.3,.5,1,2,3)) text(log(.25),.8,"200 vs. 100", pos=4, cex=1) text(log(.25),.6,"300 vs. 200", pos=4, cex=1) text(log(.25),.4,"400 vs. 300", pos=4, cex=1) text(log(.25),.2,"500 vs. 400", pos=4, cex=1) lines(log(h1d1.100.orig)-log(h0d1.100.orig)+c(-1,1)*1.96*sd1.100,rep(.8,2)) lines(log(h1d1.200.orig)-log(h0d1.200.orig)+c(-1,1)*1.96*sd1.200,rep(.6,2)) lines(log(h1d1.300.orig)-log(h0d1.300.orig)+c(-1,1)*1.96*sd1.300,rep(.4,2)) lines(log(h1d1.400.orig)-log(h0d1.400.orig)+c(-1,1)*1.96*sd1.400,rep(.2,2)) points(log(h1d1.100.orig)-log(h0d1.100.orig),.8,pch=3) points(log(h1d1.200.orig)-log(h0d1.200.orig),.6,pch=3) points(log(h1d1.300.orig)-log(h0d1.300.orig),.4,pch=3) points(log(h1d1.400.orig)-log(h0d1.400.orig),.2,pch=3) dev.off() pdf("plot-comparison-bootstrap.pdf",height=7,width=6) par(mar=c(5,.1,.5,.1)) plot(c(log(.25),log(3)),c(.1,.9),xlab="Ratio of Hazard Ratios",ylab="",axes=FALSE,type="n") abline(v=0,col=gray(.5)) axis(1,at=log(c(.33,.5,1,2,3)),c(.3,.5,1,2,3)) text(log(.25),.8,"200 vs. 100", pos=4, cex=1) text(log(.25),.6,"300 vs. 200", pos=4, cex=1) text(log(.25),.4,"400 vs. 300", pos=4, cex=1) text(log(.25),.2,"500 vs. 400", pos=4, cex=1) lines(log(h1d0.100.orig)-log(h0d0.100.orig)+c(-1,1)*1.96*sd0.100,rep(.8,2)) lines(log(h1d0.200.orig)-log(h0d0.200.orig)+c(-1,1)*1.96*sd0.200,rep(.6,2)) lines(log(h1d0.300.orig)-log(h0d0.300.orig)+c(-1,1)*1.96*sd0.300,rep(.4,2)) lines(log(h1d0.400.orig)-log(h0d0.400.orig)+c(-1,1)*1.96*sd0.400,rep(.2,2)) points(log(h1d0.100.orig)-log(h0d0.100.orig),.8,pch=3) points(log(h1d0.200.orig)-log(h0d0.200.orig),.6,pch=3) points(log(h1d0.300.orig)-log(h0d0.300.orig),.4,pch=3) points(log(h1d0.400.orig)-log(h0d0.400.orig),.2,pch=3) dev.off() #### Combined plot. (plot-comparison-death.pdf + plot-comparison.pdf [both from cnics-cole.R] #### + plot-comparison-bootstrap-death and plot-comparison-bootstrap). #### Figure 3 load("basic-sterne-results.Rda") mi.answers<-function(filename){ load(paste(filename,".Rda",sep="")) est<-ests hr<-est[1] lower<-est[2] upper<-est[3] type<-strsplit(filename,"-")[[1]][1] type.long<-ifelse(type=="basic","Basic, ADE/death", ifelse(type=="censor6","Censor after 6 years", ifelse(type=="exdeath","Exclude deaths w/in 2 wks of ART", ifelse(type=="exIDU","Exclude IDU", ifelse(type=="cov","Adjust for sex, baseline age and CD4", ifelse(type=="lost","Account for LTFU with weights", ifelse(type=="strat","Stratify by site and year", ifelse(type=="death","Basic, death", ifelse(type=="add2","Add 2 mos. to those w/out death",type))))))))) stuff<-c(hr,lower,upper,u.limit,l.limit,type,type.long) return(stuff) } ans.d.b<-rbind(mi.answers("death-200vs100"),mi.answers("death-300vs200"),mi.answers("death-400vs300"), mi.answers("death-500vs400")) pdf("figure3-combined.pdf",height=8,width=10) par(mfrow=c(2,2),mar=c(5,.1,3,.1),xpd=NA) # pdf("plot-comparison-death.pdf",height=7,width=6) # par(mar=c(5,.1,.5,.1)) plot(c(log(.25),log(3)),c(.1,1),xlab="",ylab="",axes=FALSE,type="n") lines(c(0,0),c(.05,.95),col=gray(.7)) axis(1,at=log(c(.33,.5,1,2,3)),c(.3,.5,1,2,3)) points(log(as.numeric(sterne.d.b[1,2])),.8,pch=3) lines(log(as.numeric(sterne.d.b[1,3:4])),c(.8,.8)) text(log(.25),.8,"100 vs. 200", pos=4, cex=1) points(log(as.numeric(sterne.d.b[2,2])),.6,pch=3) lines(log(as.numeric(sterne.d.b[2,3:4])),c(.6,.6)) text(log(.25),.6,"200 vs. 300", pos=4, cex=1) points(log(as.numeric(sterne.d.b[3,2])),.4,pch=3) lines(log(as.numeric(sterne.d.b[3,3:4])),c(.4,.4)) text(log(.25),.4,"300 vs. 400", pos=4, cex=1) points(log(as.numeric(sterne.d.b[4,2])),.2,pch=3) lines(log(as.numeric(sterne.d.b[4,3:4])),c(.2,.2)) text(log(.25),.2,"400 vs. 500", pos=4, cex=1) points(log(as.numeric(ans.d.b[1,1])),.75,pch=3,col=4) lines(log(as.numeric(ans.d.b[1,2:3])),c(.75,.75),col=4) points(log(as.numeric(ans.d.b[2,1])),.55,pch=3,col=4) lines(log(as.numeric(ans.d.b[2,2:3])),c(.55,.55),col=4) points(log(as.numeric(ans.d.b[3,1])),.35,pch=3,col=4) lines(log(as.numeric(ans.d.b[3,2:3])),c(.35,.35),col=4) points(log(as.numeric(ans.d.b[4,1])),.15,pch=3,col=4) lines(log(as.numeric(ans.d.b[4,2:3])),c(.15,.15),col=4) text(0,-.1,"Hazard Ratio") text(0,1,"Death", cex=1.3) #dev.off() ans.b<-rbind(mi.answers("basic-200vs100"),mi.answers("basic-300vs200"),mi.answers("basic-400vs300"), mi.answers("basic-500vs400")) #pdf("plot-comparison.pdf",height=7,width=6) #par(mar=c(5,.1,.5,.1)) plot(c(log(.25),log(3)),c(.1,1),xlab="",ylab="",axes=FALSE,type="n") #abline(v=0,col=gray(.7)) lines(c(0,0),c(.05,.95),col=gray(.7)) axis(1,at=log(c(.33,.5,1,2,3)),c(.3,.5,1,2,3)) points(log(as.numeric(sterne.b[1,2])),.8,pch=3) lines(log(as.numeric(sterne.b[1,3:4])),c(.8,.8)) text(log(.25),.8,"100 vs. 200", pos=4, cex=1) points(log(as.numeric(sterne.b[2,2])),.6,pch=3) lines(log(as.numeric(sterne.b[2,3:4])),c(.6,.6)) text(log(.25),.6,"200 vs. 300", pos=4, cex=1) points(log(as.numeric(sterne.b[3,2])),.4,pch=3) lines(log(as.numeric(sterne.b[3,3:4])),c(.4,.4)) text(log(.25),.4,"300 vs. 400", pos=4, cex=1) points(log(as.numeric(sterne.b[4,2])),.2,pch=3) lines(log(as.numeric(sterne.b[4,3:4])),c(.2,.2)) text(log(.25),.2,"400 vs. 500", pos=4, cex=1) points(log(as.numeric(ans.b[1,1])),.75,pch=3,col=4) lines(log(as.numeric(ans.b[1,2:3])),c(.75,.75),col=4) points(log(as.numeric(ans.b[2,1])),.55,pch=3,col=4) lines(log(as.numeric(ans.b[2,2:3])),c(.55,.55),col=4) points(log(as.numeric(ans.b[3,1])),.35,pch=3,col=4) lines(log(as.numeric(ans.b[3,2:3])),c(.35,.35),col=4) points(log(as.numeric(ans.b[4,1])),.15,pch=3,col=4) lines(log(as.numeric(ans.b[4,2:3])),c(.15,.15),col=4) text(log(.25),1.1,"Hazard Ratios",cex=1.5) text(0,-.1,"Hazard Ratio") text(0,1,"AIDS or Death", cex=1.3) #dev.off() plot(c(log(.25),log(3)),c(.1,1),xlab="",ylab="",axes=FALSE,type="n") #abline(v=0,col=gray(.7)) lines(c(0,0),c(.05,.95),col=gray(.7)) axis(1,at=log(c(.33,.5,1,2,3)),c(.3,.5,1,2,3)) text(log(.25),.8,"100 vs. 200", pos=4, cex=1) text(log(.25),.6,"200 vs. 300", pos=4, cex=1) text(log(.25),.4,"300 vs. 400", pos=4, cex=1) text(log(.25),.2,"400 vs. 500", pos=4, cex=1) lines(log(h1d1.100.orig)-log(h0d1.100.orig)+c(-1,1)*1.96*sd1.100,rep(.8,2)) lines(log(h1d1.200.orig)-log(h0d1.200.orig)+c(-1,1)*1.96*sd1.200,rep(.6,2)) lines(log(h1d1.300.orig)-log(h0d1.300.orig)+c(-1,1)*1.96*sd1.300,rep(.4,2)) lines(log(h1d1.400.orig)-log(h0d1.400.orig)+c(-1,1)*1.96*sd1.400,rep(.2,2)) points(log(h1d1.100.orig)-log(h0d1.100.orig),.8,pch=3) points(log(h1d1.200.orig)-log(h0d1.200.orig),.6,pch=3) points(log(h1d1.300.orig)-log(h0d1.300.orig),.4,pch=3) points(log(h1d1.400.orig)-log(h0d1.400.orig),.2,pch=3) text(0,1,"Death", cex=1.3) text(0,-.1,"Ratio of Hazard Ratios") plot(c(log(.25),log(3)),c(.1,1),xlab="",ylab="",axes=FALSE,type="n") #abline(v=0,col=gray(.7)) lines(c(0,0),c(.05,.95),col=gray(.7)) axis(1,at=log(c(.33,.5,1,2,3)),c(.3,.5,1,2,3)) text(log(.25),.8,"100 vs. 200", pos=4, cex=1) text(log(.25),.6,"200 vs. 300", pos=4, cex=1) text(log(.25),.4,"300 vs. 400", pos=4, cex=1) text(log(.25),.2,"400 vs. 500", pos=4, cex=1) lines(log(h1d0.100.orig)-log(h0d0.100.orig)+c(-1,1)*1.96*sd0.100,rep(.8,2)) lines(log(h1d0.200.orig)-log(h0d0.200.orig)+c(-1,1)*1.96*sd0.200,rep(.6,2)) lines(log(h1d0.300.orig)-log(h0d0.300.orig)+c(-1,1)*1.96*sd0.300,rep(.4,2)) lines(log(h1d0.400.orig)-log(h0d0.400.orig)+c(-1,1)*1.96*sd0.400,rep(.2,2)) points(log(h1d0.100.orig)-log(h0d0.100.orig),.8,pch=3) points(log(h1d0.200.orig)-log(h0d0.200.orig),.6,pch=3) points(log(h1d0.300.orig)-log(h0d0.300.orig),.4,pch=3) points(log(h1d0.400.orig)-log(h0d0.400.orig),.2,pch=3) text(0,1,"AIDS or Death", cex=1.3) text(0,-.1,"Ratio of Hazard Ratios") text(log(.25),1.1,"Ratios of Hazard Ratios",cex=1.5) dev.off() #### Playing with bootstrap approaches library(data.table) # stuff<-date() # d2.junk<-data.table(pid=rep(1:10000,each=20),x=rnorm(200000)) # #d2.junk<-data.frame(pid=rep(1:50,each=10),x=rnorm(500)) # # all.ids.junk<-unique(d2.junk$pid) # ids.boot<-sample(all.ids.junk,length(all.ids.junk),replace=TRUE) # # d2.boot<-d2.junk[d2.junk$pid==ids.boot[1],] # for (i in 2:length(ids.boot)){ # d2.boot<-rbind(d2.boot,d2.junk[d2.junk$pid==ids.boot[i],]) # } # # d2.boot$count<-ifelse(duplicated(d2.boot),2,1) # d2.boot$count<-ifelse(duplicated(d2.boot),3,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),4,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),5,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),6,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),7,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),8,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),9,d2.boot$count) # d2.boot$count<-ifelse(duplicated(d2.boot),10,d2.boot$count) # d2.boot$pid<-paste(d2.boot$pid,count,sep="-") # d2.boot$count<-NULL # stuff # date() set.seed(1) stuff<-date() d2.junk<-data.frame(pid=rep(1:10000,each=20),x=rnorm(200000)) clusters<-unique(d2.junk$pid) units<-sample(clusters,size=length(clusters),replace=T) df.bs<-sapply(units,function(x)which(d2.junk$pid==x)) d2.boot<-d2.junk[unlist(df.bs),] d2.boot$count<-ifelse(duplicated(d2.boot),2,1) d2.boot$count<-ifelse(duplicated(d2.boot),3,d2.boot$count) d2.boot$count<-ifelse(duplicated(d2.boot),4,d2.boot$count) d2.boot$count<-ifelse(duplicated(d2.boot),5,d2.boot$count) d2.boot$count<-ifelse(duplicated(d2.boot),6,d2.boot$count) d2.boot$count<-ifelse(duplicated(d2.boot),7,d2.boot$count) d2.boot$count<-ifelse(duplicated(d2.boot),8,d2.boot$count) d2.boot$count<-ifelse(duplicated(d2.boot),9,d2.boot$count) d2.boot$count<-ifelse(duplicated(d2.boot),10,d2.boot$count) d2.boot$pid<-paste(d2.boot$pid,d2.boot$count,sep="-") #d2.boot$count<-NULL stuff date() library(data.table) set.seed(1) stuff.qi1<-date() d2.junk<-data.frame(pid=rep(1:10000,each=20),x=rnorm(200000)) d2.junk.t <- data.table(d2.junk) setkey(d2.junk.t, pid) all.ids.junk<-unique(d2.junk.t$pid) ids.boot<-sample(all.ids.junk,length(all.ids.junk),replace=TRUE) d2.boot <- do.call("rbind", lapply(ids.boot, FUN=function(id) d2.junk.t[pid==id,])) d2.boot$count <- 1 while(sum(duplicated(d2.boot))){ d2.boot$count<-ifelse(duplicated(d2.boot),d2.boot$count+1,d2.boot$count) } d2.boot$pid<-paste(d2.boot$pid,d2.boot$count,sep="-") d2.boot<- data.frame(d2.boot) d2.boot$count<-NULL stuff.qi2<-date() d2.boot.qi<-d2.boot set.seed(1) stuff.bryan1<-date() d2.junk<-data.frame(pid=rep(1:10000,each=20),x=rnorm(200000)) clusters<-unique(d2.junk$pid) units<-sample(clusters,size=length(clusters),replace=T) df.bs<-sapply(units,function(x)which(d2.junk$pid==x)) d2.boot<-d2.junk[unlist(df.bs),] d2.boot$count <- 1 while(sum(duplicated(d2.boot))){ d2.boot$count<-ifelse(duplicated(d2.boot),d2.boot$count+1,d2.boot$count) } d2.boot$pid<-paste(d2.boot$pid,d2.boot$count,sep="-") d2.boot$count<-NULL stuff.bryan2<-date() d2.boot.bryan<-d2.boot set.seed(1) stuff.thomas1<-date() d2.junk<-data.frame(pid=rep(1:10000,each=20),x=rnorm(200000)) clusters<-unique(d2.junk$pid) units<-sample(clusters,size=length(clusters),replace=T) index.grouped <- split(seq_len(nrow(d2.junk)), d2.junk$pid) index <- unlist(index.grouped[units]) d2.boot <- d2.junk[index, ] d2.boot$count <- 1 while(sum(duplicated(d2.boot))){ d2.boot$count<-ifelse(duplicated(d2.boot),d2.boot$count+1,d2.boot$count) } d2.boot$pid<-paste(d2.boot$pid,d2.boot$count,sep="-") d2.boot$count<-NULL stuff.thomas2<-date() d2.boot.thomas<-d2.boot set.seed(1) stuff.cole1<-date() x<-data.frame(pid=rep(1:10000,each=20),x=rnorm(200000)) ids <- sort(unique(x$pid)) ids.boot <- sample(ids, replace=TRUE) len <- length(ids.boot) cnt <- sapply(seq(len), function(i) { sum(ids.boot[seq(i)] == ids.boot[i]) }) index <- lapply(ids, function(i) { which(x$pid == i) }) x.boot <- lapply(seq_along(ids.boot), function(i) { ix <- ids.boot[i] cbind(pid=sprintf("%s-%s", ix, cnt[i]), x[index[[ix]], -1, drop=FALSE], stringsAsFactors=FALSE) }) x.boot <- do.call(rbind, x.boot) # I like to reset rownames row.names(x.boot) <- NULL stuff.cole2<-date() # # # clusterBootSE<-function(formula,data,clustervar,reps){ # reg1 <- glm(formula, data)#, family=binomial(link="logit")) # #1. generate the bootstrap dataset # clusters<-unique(d2.junk$pid) # #sterrs <- matrix(NA, nrow=reps, ncol=length(coef(reg1))) # for(i in 1:reps){ # units<-sample(clusters,size=length(clusters),replace=T) # df.bs<-sapply(units,function(x)which(d2.junk$pid==x)) # df.bs<-d2.junk[unlist(df.bs),] # sterrs[i,]<-coef(glm(formula, df.bs))#, family=binomial(link="logit"))) # cat("\r",i*100/reps," % done ");flush.console() # } # val <- cbind(coef(reg1),apply(sterrs,2,sd)) # rownames(val)<-names(coef(reg1)); colnames(val) <- c("Estimate","Std. Error") # cat("\n") # return(val) # } # # # # clusterBootSE(x~1,d2.junk,pid,5)