library(geepack) library(rms) library(data.table) ####### data generation ####### # Event function w/covariate event2.p <- function(a,b,c,d,e,CD4,TIMEHAART,Xbinary) { ZZ <- exp(a*CD4 + b*CD4^2 + c*TIMEHAART + d + e*Xbinary) ZZ/(1+ZZ) } # Haart0 function w/covariate haart2.p <- function(CD4,a,b,c,Xbinary) { ZZ <- exp(a + b*CD4 + c*Xbinary) # if(any(tmp==Inf)) {stop('haart2.p problem'); tmp[tmp==Inf] <- 10000} ZZ/(1+ZZ) } cd4 <- function(id=id, mon=120, haart0=300, sdratio=0.1, wnoise=TRUE,pre=c(-5.8, 700), post=-0.05, event.arg=c(-0.015,1e-5,0.001,-2.2,0),first1=TRUE,haart0.true=TRUE, haart.arg=c(0.5,-0.05,0),freq=1,sub='all') { # mon: duration of study (months) # haart0: CD4 when to start HAART # sdratio: 0-1: used to generate noise (pre-HAART) # wnoise: generate noisy CD4 trajectory # pre: pre-HAART arguments # post: post-HAART argument # event: event arguments # first1: used for random entry, first1=TRUE forces time0=1. # haart0.true: assign HAART at first CD4 < HAART0 # haart.arg: haart arguments # freq: keep only every 'freq'-th month, e.g., 2=every other month # sub: keep subset of the data (sub='pre','post','all') # Determine when patient first visits clinic # Random entry Uniform(1,mon) entry <- ifelse(first1,1,round(runif(1,1,mon))) true.x <- seq(entry,mon+entry-1) obs.x <- seq(entry,mon+entry-1,freq) n.obs <- length(obs.x) n.true <- length(true.x) # same as mon haart <- numeric(n.true) # Only need for every obs visit, but will restrict later. event <- numeric(n.true) # Calculate event at every month # Estimate cd4, using true.x # Pre (true), w/wo noise pre.cd4 <- round(pre[1]*true.x + pre[2]) pre.cd4[pre.cd4 < 0] <- 0 pre.cd4.wn <- round((sqrt(pre.cd4) + rnorm(n.true,mean=0,sd=1))^2) # Pre (observed), w/wo noise pre.cd4.obs <- pre.cd4[match(obs.x,true.x)] pre.cd4.wn.obs <- pre.cd4.wn[match(obs.x,true.x)] # Determine when (in terms of CD4) HAART will be initiated. This depends only on the observed # cd4 values with noise!!! XtoH <- rbinom(1,1,0.25) if(!haart0.true) { haart.prob <- unlist(apply(X=cbind(pre.cd4.wn.obs,NULL),MARGIN=2,haart2.p,a=haart.arg[1],b=haart.arg[2],c=haart.arg[3],Xbinary=XtoH)) first.haart <- which(sapply(haart.prob,rbinom,n=1,size=1) > 0)[1] if(!is.na(first.haart)) haart[seq(match(obs.x[first.haart],true.x), n.true)] <- 1 } else { first.haart <- which(pre.cd4.wn.obs < haart0)[1] if(!is.na(first.haart)) haart[seq(match(obs.x[first.haart],true.x), n.true)] <- 1 } # Identify the true CD4 when HAART is initiated -- this will be used to calculate asymp. CD4. cd4haart0 <- ifelse(is.na(first.haart),NA,pre.cd4[match(obs.x[first.haart],true.x)]) post.cd4 <- post.cd4.wn <- rep(NA,n.true) # Post (w/wo noise) if(!is.na(cd4haart0)) { # Post-HARRT cd4 (true) -- note, I am using seq(n.true) instead of x.true since # it only pertains to pre-treatment. # Gras -- paper asymp.cd4 <- round(rnorm(1,mean=exp(0.31*log(cd4haart0+50) + 4.8),sd=15)) if(asymp.cd4 < 0) asymp.cd4 <- 0 post.cd4 <- round(asymp.cd4-(asymp.cd4-cd4haart0)*exp(post[1]*seq(n.true))) post.cd4[post.cd4 < 0] <- 0 # Post-HARRT cd4 (w/noise) post.cd4.wn <- round((sqrt(post.cd4) + rnorm(n.true,mean=0,sd=1))^2) post.cd4.wn[post.cd4.wn < 0] <- 0 } # Create true cd4 trajectory. if(sub=='pre') { cd4.out <- pre.cd4 cd4.out.wn <- pre.cd4.wn haart <- rep(0,n.true) # not needed } if(sub=='post') { cd4.out <- post.cd4 cd4.out.wn <- post.cd4.wn haart <- rep(1,n.true) # not needed } if(sub=='all') { cd4.out <- pre.cd4 cd4.out.wn <- pre.cd4.wn if(any(haart==1)) { # Assume patient visits the clinic at the first of the month, then if eligible for # treatment, then they start treatment right away! cd4.out <- c(pre.cd4[haart==0], pre.cd4[haart==1][1], post.cd4)[seq(n.true)] cd4.out.wn <- c(pre.cd4.wn[haart==0],pre.cd4.wn[haart==1][1], post.cd4.wn)[seq(n.true)] haart <- c(haart[haart==0],1,haart[haart==1][-1]) } } # Update event indicator; use real cd4 ##### I changed Nate's code here because XtoY and XtoH need to be the same to be confounder # X -> Y (binary X) XtoY <- rbinom(1,1,0.25) #XtoY <- XtoH ## unmeasured confounding event.prob <- event2.p(a=event.arg[1],b=event.arg[2],c=event.arg[3], d=event.arg[4],e=event.arg[5], CD4=cd4.out,TIMEHAART=cumsum(haart), Xbinary=rep(XtoY,n.true)) if(any(is.na(event.prob))) event.prob[is.na(event.prob)] <- 0 # CD4[i] predicts EVENT[i+1] event.prob <- c(0,event.prob) event.prob <- event.prob[-length(event.prob)] first.event <- which(sapply(event.prob,rbinom,n=1,size=1) > 0)[1] if(!is.na(first.event)) event[seq(first.event, n.true)] <- 1 # Event may have occurred between visits, thus we should ONLY report info # that occurred before the event + the event. if(any(event==1)) { obs.x2 <- unique(sort(c(obs.x,true.x[first.event]))) # Add first.event (if not already there) obs.x <- obs.x2[obs.x2 <= true.x[first.event] ] } #outs <- data.frame(x=true.x, y=cd4.out, y.wn=cd4.out.wn, event, haart, XtoY, XtoH) # data.frame is extremely slow, matrix is much much better outs <- cbind(id, true.x, cd4.out, cd4.out.wn, event, haart, XtoY, XtoH) colnames(outs) <- c("id", "x", "y", "y.wn", "event", "haart", "XtoY", "XtoH") outs <- outs[match(obs.x, outs[, "x"]),] outs[,"x"] <- outs[, "x"] - (outs[1,"x"] -1) if(outs[dim(outs)[1],"event"]==1) outs[dim(outs)[1],c("y.wn", "haart", "y", "XtoY", "XtoH")] <- NA #attr(outs,'ep') <- cbind(true.x,event,haart,event.prob) outs } ########## cole's approach ################# # K1 <- seq(700, 200, by=-50) # K2 <- seq(600, 100, by=-50) # K3 <- seq(500, 0, by=-50) ##### get information from the original dataset cole.getinfo <- function(Z, k1, k2, k3){ #if(Z[dim(Z)[1],"event"]==1) Z[dim(Z)[1],c("y.wn", "haart", "y", "XtoY", "XtoH")] <- NA id <- Z[1,"id"] cd40 <- Z[1,'y.wn'] fu <- Z[nrow(Z),'x'] ev <- Z[nrow(Z),'event'] ttrt <- Z[Z[,'haart']==1,'x'][1] trt <- !is.na(ttrt) cd4t <- Z[Z[,'haart']==1,'y.wn'][1] tcd4.k1 <- Z[which(Z[, "y.wn"] k2, 0, NA) sum.data$def <- ifelse( sum.data$cd4t<=k2 & sum.data$cd4t > k3, 1, sum.data$def) naive.data <- subset(sum.data, !is.na(def)) naive.data$time.start <- naive.data$ttrt ### in naive analysis, time ends when event or censoring (add half of time between visits to censored patients) ##### according to Lancet 2009: 373: 1352-63, data for patients who remained alive were censored at the patient's last visit, ##### plus 50% of the mean time between visits for each cohort ##### in our simulation, patient visits every 3 months: plus 1.5 naive.data$time.end <- ifelse(naive.data$ev, naive.data$fu, naive.data$fu+1.5) naive.data$time <- naive.data$time.end - naive.data$time.start naive.data <- naive.data[, c("ev", "def", "time")] return(naive.data) } cole.prep.impute2 <- function(pre.data, k1, k2, k3){ sum.pre.data <- cole.summary(pre.data, k1=k1, k2=k2, k3=k3) #### inclusion criteria ##1) CD40>=k1 & cd4 fall in the higher cd4 range at least once sum.pre.data <- subset(sum.pre.data, cd40>=k1 & !is.na(tcd4.k1)) ### This has been fixed in cd4_9_ep_Qi.r ### 2) drop those who die immediately when their CD4 drop below k1 #sum.pre.data <- sum.pre.data[-which(sum.pre.data$tev==sum.pre.data$tcd4.k1),] lead.time.sub <- subset(sum.pre.data, !is.na(tcd4.k2)) lead.time.sub$lead.time.event <- 1 lead.time.sub$time <- lead.time.sub$tcd4.k2 -lead.time.sub$tcd4.k1 fp.sub <- subset(sum.pre.data, ev & is.na(tcd4.k2)) fp.sub$fp.event <- 1 fp.sub$time <- fp.sub$tev - fp.sub$tcd4.k1 pi <- dim(fp.sub)[1]/(dim(fp.sub)[1] + dim(lead.time.sub)[1]) ##### redistribute censored patient to lead.time group and fast progressor group proportionally censor.sub <- subset(sum.pre.data, ev==0 & is.na(tcd4.k2)) if (dim(censor.sub)[1]>=2) { #### Lancet paper: if censored, add half of mean time between visits censor.sub$fu <- censor.sub$fu+1.5 no.redistr.fp <- sample(1:dim(censor.sub)[1], max(1,round(pi* dim(censor.sub)[1])), replace=FALSE) redistr.fp <- censor.sub[no.redistr.fp, ] redistr.fp$fp.event <- 0 redistr.fp$time <- redistr.fp$fu - redistr.fp$tcd4.k1 redistr.lead.time <- censor.sub[-no.redistr.fp, ] redistr.lead.time$lead.time.event <- 0 redistr.lead.time$time <- redistr.lead.time$fu - redistr.lead.time$tcd4.k1 lead.time.sub <- rbind(lead.time.sub, redistr.lead.time) fp.sub <- rbind(fp.sub, redistr.fp) } #### Kaplan Meier estimates for lead.time and fast progressor lead.time.surv <- survfit(Surv(time, lead.time.event)~1, data=lead.time.sub) lead.time.f <- cbind(summary(lead.time.surv)$surv, summary(lead.time.surv)$time) if (lead.time.f[dim(lead.time.f)[1], 1]==0) lead.time.f <- lead.time.f else lead.time.f <- rbind(lead.time.f, c(0, max(lead.time.sub$time))) fp.surv <- survfit(Surv(time, fp.event)~1, data=fp.sub) fp.f <- cbind(summary(fp.surv)$surv, summary(fp.surv)$time) if (fp.f[dim(fp.f)[1], 1]==0) fp.f <- fp.f else fp.f <- rbind(fp.f, c(0, max(fp.sub$time))) return(list(lead.time.f=lead.time.f, fp.f=fp.f, pi=pi, no.pre.patient=dim(sum.pre.data)[1])) } cole.analysis2 <- function(naive.data, lead.time.f, fp.f, pi, no.pre.patient, no.impute=25){ #m.naive <- coxph(Surv(time, ev) ~ def, data=naive.data) #naive.est <- m.naive$coef #naive.se <- sqrt(m.naive$var) #naive.result <- exp(c(naive.est, naive.est-1.96*naive.se, naive.est+1.96*naive.se)) imp.result <- matrix(rep(NA,25*2), ncol=2) no.impute.fastprogressor <- round( sum(naive.data$def)*pi/(1-pi), 0) for (i in 1:no.impute){ ### imm group: lead.time=0; def group: we sample from kaplan-meier estimates of lead time of preHAART data #no.impute.fastprogressor <- rnbinom(1, size=sum(naive.data$def), 1-pi) impute.lead.time <- sapply(runif(sum(naive.data$def==1)), FUN=function(x) lead.time.f[which(x>=lead.time.f[,1])[1], 2]) adj.data <- naive.data[, c("ev", "def")] adj.data$time.adj[adj.data$def==0] <- naive.data$time[naive.data$def==0] adj.data$time.adj[adj.data$def==1] <- naive.data$time[naive.data$def==1] + impute.lead.time #### impute fastprogressor: sample fastprogressor (with replacement) from the simulated preHAART data if(no.impute.fastprogressor>0){ impute.fp.time <- sapply(runif(no.impute.fastprogressor), FUN=function(x) fp.f[which(x>=fp.f[,1])[1], 2]) impute.fp.data <- cbind(1, 1, impute.fp.time) colnames(impute.fp.data) <- c("ev", "def", "time.adj") ### add imputed fastprogressor adj.data <- rbind(adj.data, impute.fp.data) } m.adj <- coxph(Surv(time.adj, ev)~def, data=adj.data) imp.result[i,] <- c(m.adj$coef, m.adj$var) } r.est <- mean(imp.result[,1]) se.est <- sqrt(mean(imp.result[,2]) + (1+1/no.impute)*var(imp.result[,1])) adj.result <- exp(c(r.est, r.est-1.96*se.est, r.est+1.96*se.est)) #no.imm <- sum(naive.data$def==0) #no.def <- sum(naive.data$def) #no.patient <- dim(naive.data)[1] #lead.time.quantiles <- quantile(impute.lead.time) #fp.time.quantiles <- quantile(impute.fp.time) #no.event <- sum(naive.data$ev) # return(list(naive.result=round(naive.result,2), adj.result=round(adj.result,2), # no.pre.patient= no.pre.patient, lead.time.quantiles = lead.time.quantiles, fp.time.quantiles=fp.time.quantiles, pi=round(pi*100,1), # no.patient=no.patient, no.imm=no.imm, no.def=no.def, no.event=no.event, # no.impute.fastprogressor=no.impute.fastprogressor)) # # return(list(adj.result=adj.result)) return(adj.result) } # print.out <- function(result){ # NO.pre.patient <- result$no.pre.patient # leadtime.quantiles <- paste(round(result$lead.time), collapse="-") # fp.time.quantiles <- paste(round(result$fp.time), collapse="-") # pi <- result$pi # NO.patient <- result$no.patient # NO.event <- result$no.event # NO.unseen.event <- result$no.impute.fastprogressor # naive <- paste(result$naive[1], " (", result$naive[2], "-",result$naive[3] , ")", sep="") # adj <- paste(result$adj[1], " (", result$adj[2], "-",result$adj[3] , ")", sep="") # return(cbind(NO.pre.patient, leadtime.quantiles, fp.time.quantiles, pi, NO.patient, NO.event, NO.unseen.event, naive, adj)) # } cole.result2 <- function(all.data, pre.data,K1,K2,K3){ # result.out <- NULL #impute <- vector('list', length(K1)) #result <- vector('list',length(K1)) result <- matrix(NA, nrow=length(K1), ncol=3) colnames(result) <- c("HR", "LB", "UB") rownames(result) <- paste( paste(K3+1, K2, sep="-"), "vs.", paste(K2+1, K1, sep="-") ) for (i in 1:length(K1)){ k1 <- K1[i] k2 <- K2[i] k3 <- K3[i] #higher.range <- paste(k2+1, k1, sep="-") #lower.range <- paste(k3+1, k2, sep="-") naive.data <- try(cole.prep.naive(all.data, k1, k2, k3)) impute.data <- try(cole.prep.impute2(pre.data, k1, k2, k3)) result[i,] <- try(cole.analysis2(naive.data=naive.data, lead.time.f=impute.data$lead.time.f, fp.f=impute.data$fp.f, pi=impute.data$pi, no.pre.patient=impute.data$no.pre.patient, no.impute=25)) #rownames(result)[i] <- paste(higher.range, "vs",lower.range) #impute[[i]] <- impute.data #result.out <- tryCatch(rbind(result.out, cbind(higher.range, lower.range, print.out(result[[i]]))), error=function(e) print(e)) } return(result) } ########### Hernan's approach ################################# #library(geepack) #library(rms) #library(data.table) hernan.getinfo <- function(Z, k1, k2){ #if(Z[dim(Z)[1],"event"]==1) Z[dim(Z)[1],c("y.wn", "haart", "y", "XtoY", "XtoH")] <- NA cd40 <- Z[1,'y.wn'] fu <- Z[nrow(Z),'x'] ev <- Z[nrow(Z),'event'] ttrt <- Z[Z[,'haart']==1,'x'][1] trt <- !is.na(ttrt) cd4t <- Z[Z[,'haart']==1,'y.wn'][1] tcd4.k1 <- Z[which(Z[, "y.wn"] info$tcd4.k1+grace, 1, 0) time.end.k1 <- ifelse(ac.k1, info$tcd4.k1+grace, info$fu) k1.data <- org.data[which(org.data[, "x"]==time.start): max(which(org.data[, "x"]<=time.end.k1)) , c("id", "x", "y.wn", "event","haart")] if (time.start == time.end.k1) { names <- names(k1.data) k1.data <- matrix(k1.data, nrow=1) colnames(k1.data) <- names } def <- 0 ac <- c(rep(0, dim(k1.data)[1]-1), ac.k1) cd4 <- k1.data[, "y.wn"] time <- k1.data[, "x"] - time.start ind.haart <- cumsum(1 - cumprod(1-k1.data[, "haart"])) ind.haart <- ifelse(ind.haart>1, 2, ind.haart) k1.data <- cbind(k1.data, cd4, def, ac, time, ind.haart) ac.k2 <- ifelse( (info$ttrt<= info$tcd4.k2 + grace & info$ttrt >= info$tcd4.k2) | (is.na(info$ttrt) & is.na(info$tcd4.k2)) | (is.na(info$ttrt) & info$fu <= info$tcd4.k2 + grace), 0, 1) ac.k2 <- ifelse(is.na(ac.k2), 1, ac.k2) time.end.k2 <- ifelse(!ac.k2, info$fu, NA) time.end.k2 <- ifelse(ac.k2 & ( is.na(info$tcd4.k2) | info$ttrt < info$tcd4.k2 ), info$ttrt, time.end.k2 ) time.end.k2 <- ifelse(ac.k2 & !is.na(info$tcd4.k2) & (info$ttrt > info$tcd4.k2 + grace | is.na(info$ttrt)), info$tcd4.k2 + grace, time.end.k2) k2.data <- org.data[which(org.data[, "x"]==time.start): max(which(org.data[, "x"]<=time.end.k2)) , c("id", "x", "y.wn", "event","haart")] if (time.start == time.end.k2) { names <- names(k2.data) k2.data <- matrix(k2.data, nrow=1) colnames(k2.data) <- names } def <- 1 ac <- c(rep(0, dim(k2.data)[1]-1), ac.k2) cd4 <- k2.data[, "y.wn"] time <- k2.data[,"x"] -time.start ind.haart <- cumsum(1- cumprod(1-k2.data[, "haart"])) ind.haart <- ifelse(ind.haart>1, 2, ind.haart) k2.data <- cbind(k2.data, cd4, def, ac, time, ind.haart) pt.data <- rbind(k1.data, k2.data) pt.data[which(pt.data[,"event"]==1),"ac"] <- NA pt.data[, "time"] <- ceiling(pt.data[,"time"]/3)*3 ind.k2g <- ifelse(pt.data[, "x"] >= info$tcd4.k2 & pt.data[, "x"] < info$tcd4.k2 +grace, 1, 0) pt.data <- cbind(pt.data, ind.k2g) return(pt.data[ , c("id", "time", "cd4", "ac", "def", "event", "haart", "ind.haart", "ind.k2g")]) } else return(NULL) } hernan.analysis.grace <- function(pt.data, k1, k2, nk=10, grace=3, trunc=TRUE){ # n.total <- length(unique(pt.data[, "id"])) # n.ac.imm <- length(unique(subset(pt.data, pt.data[, "ac"]==1& pt.data[,"def"]==0)[,"id"])) # n.ac.def <- length(unique(subset(pt.data, pt.data[,"ac"]==1& pt.data[, "def"]==1)[,"id"])) # n.event.def <- length(unique(subset(pt.data, pt.data[, "event"]==1 & pt.data[, "def"]==1)[, "id"])) # n.event.imm <- length(unique(subset(pt.data, pt.data[, "event"]==1 & pt.data[, "def"]==0)[, "id"])) m1 <- lrm(haart~rcs(time, nk) + cd4, data=as.data.frame(unique( pt.data[which(pt.data[,"ind.haart"]<2), c("id","time", "cd4", "haart")]) ), tol=1e-9) p1 <- NA pt.data <- cbind(pt.data, p1) pt.data[which(pt.data[,"ind.haart"]<2), "p1"] <- 1/(1+exp(predict(m1, newdata=pt.data[ which(pt.data[, "ind.haart"]<2) , c("time", "cd4")] ))) pt.data[, "p1"] <- ifelse(pt.data[, "haart"], 1-pt.data[, "p1"], pt.data[, "p1"]) pt.data[which(pt.data[, "ind.haart"]==2), "p1"] <- 1 ### for immediate group, no one can be censored before time=grace pt.data[which(pt.data[, "def"]==0 & pt.data[, "time"]grace no one could be censored too # pt.data[which(pt.data[, "def"]==0 & !is.na(pt.data[, "ac"]) & pt.data[, "time"]grace), "p2"] <- 1 # # #### deferred group (t>0) # m2 <- glm(ac~rcs(time, nk), data=as.data.frame(subset(pt.data, pt.data[, "def"]==1 & !is.na(pt.data[, "ac"]))), family=binomial(link="logit")) # #m2 <- glm(ac~rcs(time, nk), data=subset(pt.data, def==1 & !is.na(ac)), family=binomial(link="logit")) # # pt.data[which(pt.data[, "def"]==1 &!is.na(pt.data[, "ac"])), "p2"] <- 1/(1+exp(predict(m2))) # w <- sw <- NA pt.data <- cbind(pt.data, w, sw) ##### data.table is much much quicker for inquiry! data <- data.table(pt.data[, c("id", "def","time", "p1", "p2")]) setkey(data, id, def) pt.data[pt.data[, "def"]==1, "w"] <- do.call("c", lapply(unique(data[def==1, id]), FUN=function(i) { wts <- cumprod(c(1, data[J(i, 1)]$p1)) wts <- 1/wts[-length(wts)]} ) ) pt.data[pt.data[, "def"]==0, "w"] <- do.call("c", lapply(unique(data[def==0, id]), FUN=function(i) { wts <- cumprod(c(1, data[J(i, 0)]$p1)) wts <- 1/wts[-length(wts)]} ) ) # pt.data[pt.data[, "def"]==1, "sw"] <- pt.data[pt.data[, "def"]==1,"w"] * # do.call("c", lapply(unique(data[def==1, id]), FUN=function(i) { # wts <- cumprod(c(1, data[J(i, 1)]$p2)) # wts <- wts[-length(wts)]} ) ) # # pt.data[pt.data[, "def"]==0, "sw"] <- pt.data[pt.data[, "def"]==0,"w"] * # do.call("c", lapply(unique(data[def==0, id]), FUN=function(i) { # wts <- cumprod(c(1, data[J(i, 0)]$p2)) # wts <- wts[-length(wts)]} ) ) #### stablized weight numerator w.quantiles <- quantile(pt.data[, "w"]) #sw.quantiles <- quantile(pt.data[, "sw"]) #w.quantiles <- quantile(pt.data$w) #sw.quantiles <- quantile(pt.data$sw) if (trunc){ w.trunc <- quantile(pt.data[, "w"], c(0.01, 0.99)) #sw.trunc <- quantile(pt.data[, "sw"], c(0.01, 0.99)) pt.data[, "w"] <- ifelse(pt.data[, "w"] < w.trunc[1], w.trunc[1], pt.data[, "w"]) pt.data[, "w"] <- ifelse(pt.data[, "w"] > w.trunc[2], w.trunc[2], pt.data[, "w"]) #pt.data[, "sw"] <- ifelse(pt.data[, "sw"] < sw.trunc[1], sw.trunc[1], pt.data[, "sw"]) #pt.data[, "sw"] <- ifelse(pt.data[, "sw"] > sw.trunc[2], sw.trunc[2], pt.data[, "sw"]) } #pt.data <- subset(pt.data, time>0) pt.data <- subset(pt.data, pt.data[,"time"]>0) #m.naive <- geeglm(event~ rcs(time, nk) + def, data=as.data.frame(pt.data), id=id, corstr = "independence", family=binomial(link="logit")) m.w <- geeglm(event~ rcs(time, nk) + def, data=as.data.frame(pt.data), id=id, corstr = "independence", weights=pt.data[, "w"], family=binomial(link="logit")) #m.sw <- geeglm(event~ rcs(time, nk) + def, data=as.data.frame(pt.data), id=id, corstr = "independence", weights=pt.data[, "sw"], family=binomial(link="logit")) #s.naive <- summary(m.naive) s.w <- summary(m.w) #s.sw <- summary(m.sw) # est <- c(s.naive$coef["def", 1], s.w$coef["def", 1], s.sw$coef["def", 1] ) # se <- c(s.naive$coef["def", 2], s.w$coef["def", 2], s.sw$coef["def", 2] ) # result <- exp(cbind(est, est-1.96*se, est+1.96*se)) # rownames(result) <- c("naive", "weights", "stabilized weights") # colnames(result) <- c("OR", "LB", "UB") est <- s.w$coef["def", 1] se <- s.w$coef["def", 2] result <- exp(cbind(est, est-1.96*se, est+1.96*se)) names(result) <- c("OR", "LB", "UB") #rownames(result) <- c("naive", "weights", "stabilized weights") #colnames(result) <- c("OR", "LB", "UB") return(result) # return(list(k1=k1, k2=k2, result=result, n.total=n.total, # n.ac.imm=n.ac.imm, n.ac.def=n.ac.def, # n.event.def=n.event.def, n.event.imm=n.event.imm, # w.quantiles=w.quantiles, sw.quantiles=sw.quantiles)) } speed.hernan.grace.result <- function(K1, K2, all.data, cd40=TRUE, nk=10, grace=3, trunc=TRUE){ result <- matrix(NA, nrow=length(K1), ncol=3) colnames(result) <- c("HR", "LB", "UB") rownames(result) <- paste(K2, "vs.", K1) for (i in 1:length(K1) ){ k1 <- K1[i] k2 <- K2[i] pt.data <- NULL pt.data <- try(do.call("rbind", lapply(all.data, FUN=function(org.data) {gen.pt.grace(org.data=org.data, k1=k1,k2=k2, cd40=cd40, grace=grace)}))) result[i, ] <- try(hernan.analysis.grace(pt.data, k1=k1, k2=k2, nk=nk, grace=grace, trunc=trunc)) } return(result) } ######## simulation n=15000, sim=500 ###### source("when2start_functions.R") ####### scenario I: optimal CD4 =350 nn <- 15000 #nn <- 1000 sim <- 500 #sim <- 10 set.seed(1) sim.seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] haartargs <- c(1,-0.01,0) start.at.1 <- TRUE # 3 'ideal' conditions are defined below using the event.arg argument. event.arg.700 <- c(-0.007,1e-06,0,-3.1,0) event.arg.500 <- c(-0.007,1e-06,0.03,-3.5,0) event.arg.350 <- c(-0.007,1e-06,0.05,-4.1,0) event.arg.truth <- event.arg.350 K1 <- seq(700, 200, by=-50) K2 <- seq(600, 100, by=-50) K3 <- seq(500, 0, by=-50) # K1 <- seq(600, 200, by=-100) # K2 <- seq(500, 100, by=-100) # K3 <- seq(400, 0, by=-100) nk = 10 cd40=FALSE grace=6 trunc=TRUE cole_result <- vector('list', sim) hernan_result <- vector('list', sim) Sys.time() for (i in 1:sim){ #### ##### generate data ########## pre.data <- all.data <- vector('list',nn) set.seed(sim.seeds[i]) seeds <- unique(round(runif(nn*10,0,10)*nn,0))[seq(nn)] for(ii in seq(nn)) { set.seed(seeds[ii]) pre.data[[ii]] <- try(cd4(id=ii, mon = 120, haart0 = 350, sdratio = 0.1, wnoise = TRUE, pre = c(round(rnorm(1,mean=-5.8,sd=0.75),2),round(rnorm(1,mean=700,sd=50),0)), post = round(rnorm(1,mean=-0.05,sd=0.01),2), event.arg = event.arg.truth, first1 = start.at.1, haart0.true = FALSE, haart.arg = haartargs, freq = 3, sub = "pre")) set.seed(seeds[ii]) all.data[[ii]] <- try(cd4(id=ii, mon = 120, haart0 = 350, sdratio = 0.1, wnoise = TRUE, pre = c(round(rnorm(1,mean=-5.8,sd=0.75),2),round(rnorm(1,mean=700,sd=50),0)), post = round(rnorm(1,mean=-0.05,sd=0.01),2), event.arg = event.arg.truth, first1 = start.at.1, haart0.true = FALSE, haart.arg = haartargs, freq = 3, sub = "all")) } cole_result[[i]] <- try(cole.result2(all.data, pre.data, K1, K2, K3)) hernan_result[[i]] <- try(speed.hernan.grace.result(K1=K1, K2=K2, all.data=all.data, cd40=cd40, nk=nk, grace=grace, trunc=trunc)) cat("sim", i, "finished at") print(Sys.time()) } save(cole_result, hernan_result, file="sim_350.RData") ######## Scenario II: optimal CD4=500 source("when2start_functions.R") nn <- 15000 #nn <- 1000 #sim <- 2 sim <- 500 set.seed(1) sim.seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] haartargs <- c(1,-0.01,0) start.at.1 <- TRUE # 3 'ideal' conditions are defined below using the event.arg argument. event.arg.700 <- c(-0.007,1e-06,0,-3.1,0) event.arg.500 <- c(-0.007,1e-06,0.03,-3.5,0) event.arg.350 <- c(-0.007,1e-06,0.05,-4.1,0) event.arg.truth <- event.arg.500 K1 <- seq(700, 200, by=-50) K2 <- seq(600, 100, by=-50) K3 <- seq(500, 0, by=-50) # K1 <- seq(600, 200, by=-100) # K2 <- seq(500, 100, by=-100) # K3 <- seq(400, 0, by=-100) nk = 10 cd40=FALSE grace=6 trunc=TRUE cole_result <- vector('list', sim) hernan_result <- vector('list', sim) Sys.time() for (i in 1:sim){ #### ##### generate data ########## pre.data <- all.data <- vector('list',nn) set.seed(sim.seeds[i]) seeds <- unique(round(runif(nn*10,0,10)*nn,0))[seq(nn)] for(ii in seq(nn)) { set.seed(seeds[ii]) pre.data[[ii]] <- try(cd4(id=ii, mon = 120, haart0 = 500, sdratio = 0.1, wnoise = TRUE, pre = c(round(rnorm(1,mean=-5.8,sd=0.75),2),round(rnorm(1,mean=700,sd=50),0)), post = round(rnorm(1,mean=-0.05,sd=0.01),2), event.arg = event.arg.truth, first1 = start.at.1, haart0.true = FALSE, haart.arg = haartargs, freq = 3, sub = "pre")) set.seed(seeds[ii]) all.data[[ii]] <- try(cd4(id=ii, mon = 120, haart0 = 500, sdratio = 0.1, wnoise = TRUE, pre = c(round(rnorm(1,mean=-5.8,sd=0.75),2),round(rnorm(1,mean=700,sd=50),0)), post = round(rnorm(1,mean=-0.05,sd=0.01),2), event.arg = event.arg.truth, first1 = start.at.1, haart0.true = FALSE, haart.arg = haartargs, freq = 3, sub = "all")) } cole_result[[i]] <- try(cole.result2(all.data, pre.data, K1, K2, K3)) hernan_result[[i]] <- try(speed.hernan.grace.result(K1=K1, K2=K2, all.data=all.data, cd40=cd40, nk=nk, grace=grace, trunc=trunc)) cat("sim", i, "finished at") print(Sys.time()) } save(cole_result, hernan_result, file="sim_500.RData") ##### Scenario III: optimal CD4=700 source("when2start_functions.R") nn <- 15000 #nn <- 1000 #sim <- 10 sim <- 500 set.seed(1) sim.seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] haartargs <- c(1,-0.01,0) start.at.1 <- TRUE # 3 'ideal' conditions are defined below using the event.arg argument. event.arg.700 <- c(-0.007,1e-06,0,-3.1,0) event.arg.500 <- c(-0.007,1e-06,0.03,-3.5,0) event.arg.350 <- c(-0.007,1e-06,0.05,-4.1,0) event.arg.truth <- event.arg.700 K1 <- seq(700, 200, by=-50) K2 <- seq(600, 100, by=-50) K3 <- seq(500, 0, by=-50) # K1 <- seq(600, 200, by=-100) # K2 <- seq(500, 100, by=-100) # K3 <- seq(400, 0, by=-100) nk = 10 cd40=FALSE grace=6 trunc=TRUE cole_result <- vector('list', sim) hernan_result <- vector('list', sim) Sys.time() for (i in 1:sim){ #### ##### generate data ########## pre.data <- all.data <- vector('list',nn) set.seed(sim.seeds[i]) seeds <- unique(round(runif(nn*10,0,10)*nn,0))[seq(nn)] for(ii in seq(nn)) { set.seed(seeds[ii]) pre.data[[ii]] <- try(cd4(id=ii, mon = 120, haart0 = 700, sdratio = 0.1, wnoise = TRUE, pre = c(round(rnorm(1,mean=-5.8,sd=0.75),2),round(rnorm(1,mean=700,sd=50),0)), post = round(rnorm(1,mean=-0.05,sd=0.01),2), event.arg = event.arg.truth, first1 = start.at.1, haart0.true = FALSE, haart.arg = haartargs, freq = 3, sub = "pre")) set.seed(seeds[ii]) all.data[[ii]] <- try(cd4(id=ii, mon = 120, haart0 = 700, sdratio = 0.1, wnoise = TRUE, pre = c(round(rnorm(1,mean=-5.8,sd=0.75),2),round(rnorm(1,mean=700,sd=50),0)), post = round(rnorm(1,mean=-0.05,sd=0.01),2), event.arg = event.arg.truth, first1 = start.at.1, haart0.true = FALSE, haart.arg = haartargs, freq = 3, sub = "all")) } cole_result[[i]] <- try(cole.result2(all.data, pre.data, K1, K2, K3)) hernan_result[[i]] <- try(speed.hernan.grace.result(K1=K1, K2=K2, all.data=all.data, cd40=cd40, nk=nk, grace=grace, trunc=trunc)) cat("sim", i, "finished at") print(Sys.time()) } save(cole_result, hernan_result, file="sim_700.RData") ##### save the results of simulation I load("sim_350.RData") cole_result_350 <- cole_result hernan_result_350 <- hernan_result load("sim_500.RData") cole_result_500 <- cole_result hernan_result_500 <- hernan_result load("sim_700.RData") cole_result_700 <- cole_result hernan_result_700 <- hernan_result # cole_result_700[[143]] <- NULL # hernan_result_350[[1]] <- NULL # hernan_result_350[[67]] <- NULL # hernan_result_700[[342]] <- NULL # hernan_result_700[[415]] <- NULL # hernan_result_700[[434]] <- NULL save(cole_result_350, cole_result_500, cole_result_700, hernan_result_350, hernan_result_500, hernan_result_700, file="when2start_add_sim500_new.RData") ############ Simulation II: n= 500000, sim=1; mimic large sample truth ### Scenario I: optimal CD4=350 nn <- 500000 #nn <- 1000 sim <- 1 #sim <- 10 set.seed(1) sim.seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] haartargs <- c(1,-0.01,0) start.at.1 <- TRUE # 3 'ideal' conditions are defined below using the event.arg argument. event.arg.700 <- c(-0.007,1e-06,0,-3.1,0) event.arg.500 <- c(-0.007,1e-06,0.03,-3.5,0) event.arg.350 <- c(-0.007,1e-06,0.05,-4.1,0) event.arg.truth <- event.arg.350 K1 <- seq(700, 200, by=-50) K2 <- seq(600, 100, by=-50) K3 <- seq(500, 0, by=-50) # K1 <- seq(600, 200, by=-100) # K2 <- seq(500, 100, by=-100) # K3 <- seq(400, 0, by=-100) nk = 10 cd40=FALSE grace=6 trunc=TRUE cole_result <- vector('list', sim) hernan_result <- vector('list', sim) Sys.time() for (i in 1:sim){ #### ##### generate data ########## pre.data <- all.data <- vector('list',nn) set.seed(sim.seeds[i]) seeds <- unique(round(runif(nn*10,0,10)*nn,0))[seq(nn)] for(ii in seq(nn)) { set.seed(seeds[ii]) pre.data[[ii]] <- try(cd4(id=ii, mon = 120, haart0 = 350, sdratio = 0.1, wnoise = TRUE, pre = c(round(rnorm(1,mean=-5.8,sd=0.75),2),round(rnorm(1,mean=700,sd=50),0)), post = round(rnorm(1,mean=-0.05,sd=0.01),2), event.arg = event.arg.truth, first1 = start.at.1, haart0.true = FALSE, haart.arg = haartargs, freq = 3, sub = "pre")) set.seed(seeds[ii]) all.data[[ii]] <- try(cd4(id=ii, mon = 120, haart0 = 350, sdratio = 0.1, wnoise = TRUE, pre = c(round(rnorm(1,mean=-5.8,sd=0.75),2),round(rnorm(1,mean=700,sd=50),0)), post = round(rnorm(1,mean=-0.05,sd=0.01),2), event.arg = event.arg.truth, first1 = start.at.1, haart0.true = FALSE, haart.arg = haartargs, freq = 3, sub = "all")) } cole_result[[i]] <- try(cole.result2(all.data, pre.data, K1, K2, K3)) hernan_result[[i]] <- try(speed.hernan.grace.result(K1=K1, K2=K2, all.data=all.data, cd40=cd40, nk=nk, grace=grace, trunc=trunc)) cat("sim", i, "finished at") print(Sys.time()) } save(cole_result, hernan_result, file="sim_350.RData") ###### scenario II: optimal CD4=500 nn <- 500000 #nn <- 1000 sim <- 1 set.seed(1) sim.seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] haartargs <- c(1,-0.01,0) start.at.1 <- TRUE # 3 'ideal' conditions are defined below using the event.arg argument. event.arg.700 <- c(-0.007,1e-06,0,-3.1,0) event.arg.500 <- c(-0.007,1e-06,0.03,-3.5,0) event.arg.350 <- c(-0.007,1e-06,0.05,-4.1,0) event.arg.truth <- event.arg.500 K1 <- seq(700, 200, by=-50) K2 <- seq(600, 100, by=-50) K3 <- seq(500, 0, by=-50) # K1 <- seq(600, 200, by=-100) # K2 <- seq(500, 100, by=-100) # K3 <- seq(400, 0, by=-100) nk = 10 cd40=FALSE grace=6 trunc=TRUE cole_result <- vector('list', sim) hernan_result <- vector('list', sim) Sys.time() for (i in 1:sim){ #### ##### generate data ########## pre.data <- all.data <- vector('list',nn) set.seed(sim.seeds[i]) seeds <- unique(round(runif(nn*10,0,10)*nn,0))[seq(nn)] for(ii in seq(nn)) { set.seed(seeds[ii]) pre.data[[ii]] <- try(cd4(id=ii, mon = 120, haart0 = 500, sdratio = 0.1, wnoise = TRUE, pre = c(round(rnorm(1,mean=-5.8,sd=0.75),2),round(rnorm(1,mean=700,sd=50),0)), post = round(rnorm(1,mean=-0.05,sd=0.01),2), event.arg = event.arg.truth, first1 = start.at.1, haart0.true = FALSE, haart.arg = haartargs, freq = 3, sub = "pre")) set.seed(seeds[ii]) all.data[[ii]] <- try(cd4(id=ii, mon = 120, haart0 = 500, sdratio = 0.1, wnoise = TRUE, pre = c(round(rnorm(1,mean=-5.8,sd=0.75),2),round(rnorm(1,mean=700,sd=50),0)), post = round(rnorm(1,mean=-0.05,sd=0.01),2), event.arg = event.arg.truth, first1 = start.at.1, haart0.true = FALSE, haart.arg = haartargs, freq = 3, sub = "all")) } cole_result[[i]] <- try(cole.result2(all.data, pre.data, K1, K2, K3)) hernan_result[[i]] <- try(speed.hernan.grace.result(K1=K1, K2=K2, all.data=all.data, cd40=cd40, nk=nk, grace=grace, trunc=trunc)) cat("sim", i, "finished at") print(Sys.time()) } save(cole_result, hernan_result, file="sim_500.RData") #### scenario III: optimal CD4 =700 nn <- 500000 #nn <- 1000 sim <- 1 set.seed(1) sim.seeds <- unique(round(runif(sim*10,0,10)*sim,0))[seq(sim)] haartargs <- c(1,-0.01,0) start.at.1 <- TRUE # 3 'ideal' conditions are defined below using the event.arg argument. event.arg.700 <- c(-0.007,1e-06,0,-3.1,0) event.arg.500 <- c(-0.007,1e-06,0.03,-3.5,0) event.arg.350 <- c(-0.007,1e-06,0.05,-4.1,0) event.arg.truth <- event.arg.700 K1 <- seq(700, 200, by=-50) K2 <- seq(600, 100, by=-50) K3 <- seq(500, 0, by=-50) # K1 <- seq(600, 200, by=-100) # K2 <- seq(500, 100, by=-100) # K3 <- seq(400, 0, by=-100) nk = 10 cd40=FALSE grace=6 trunc=TRUE cole_result <- vector('list', sim) hernan_result <- vector('list', sim) Sys.time() for (i in 1:sim){ #### ##### generate data ########## pre.data <- all.data <- vector('list',nn) set.seed(sim.seeds[i]) seeds <- unique(round(runif(nn*10,0,10)*nn,0))[seq(nn)] for(ii in seq(nn)) { set.seed(seeds[ii]) pre.data[[ii]] <- try(cd4(id=ii, mon = 120, haart0 = 700, sdratio = 0.1, wnoise = TRUE, pre = c(round(rnorm(1,mean=-5.8,sd=0.75),2),round(rnorm(1,mean=700,sd=50),0)), post = round(rnorm(1,mean=-0.05,sd=0.01),2), event.arg = event.arg.truth, first1 = start.at.1, haart0.true = FALSE, haart.arg = haartargs, freq = 3, sub = "pre")) set.seed(seeds[ii]) all.data[[ii]] <- try(cd4(id=ii, mon = 120, haart0 = 700, sdratio = 0.1, wnoise = TRUE, pre = c(round(rnorm(1,mean=-5.8,sd=0.75),2),round(rnorm(1,mean=700,sd=50),0)), post = round(rnorm(1,mean=-0.05,sd=0.01),2), event.arg = event.arg.truth, first1 = start.at.1, haart0.true = FALSE, haart.arg = haartargs, freq = 3, sub = "all")) } cole_result[[i]] <- try(cole.result2(all.data, pre.data, K1, K2, K3)) hernan_result[[i]] <- try(speed.hernan.grace.result(K1=K1, K2=K2, all.data=all.data, cd40=cd40, nk=nk, grace=grace, trunc=trunc)) cat("sim", i, "finished at") print(Sys.time()) } save(cole_result, hernan_result, file="sim_700.RData") ##### save the results of simulation II load("sim_350.RData") cole_result_350 <- cole_result hernan_result_350 <- hernan_result load("sim_500.RData") cole_result_500 <- cole_result hernan_result_500 <- hernan_result load("sim_700.RData") cole_result_700 <- cole_result hernan_result_700 <- hernan_result save(cole_result_350, cole_result_500, cole_result_700, hernan_result_350, hernan_result_500, hernan_result_700, file="when2start_add_n500000.RData") ############################ Figure 6 ################################ # # # Event function w/covariate # event2.p <- function(a,b,c,d,e,CD4,TIMEHAART,Xbinary) { # ZZ <- exp(a*CD4 + b*CD4^2 + c*TIMEHAART + d + e*Xbinary) # ZZ/(1+ZZ) # } # # # Haart0 function w/covariate # haart2.p <- function(CD4,a,b,c,Xbinary) { # ZZ <- exp(a + b*CD4 + c*Xbinary) # # if(any(tmp==Inf)) {stop('haart2.p problem'); tmp[tmp==Inf] <- 10000} # ZZ/(1+ZZ) # } # # cd4 <- function(mon=120, haart0=300, sdratio=0.1, wnoise=TRUE,pre=c(-5.8, 700), post=-0.05, # event.arg=c(-0.015,1e-5,0.001,-2.2,0),first1=TRUE,haart0.true=TRUE, # haart.arg=c(0.5,-0.05,0),freq=1,sub='all') { # # mon: duration of study (months) # # haart0: CD4 when to start HAART # # sdratio: 0-1: used to generate noise (pre-HAART) # # wnoise: generate noisy CD4 trajectory # # pre: pre-HAART arguments # # post: post-HAART argument # # event: event arguments # # first1: used for random entry, first1=TRUE forces time0=1. # # haart0.true: assign HAART at first CD4 < HAART0 # # haart.arg: haart arguments # # freq: keep only every 'freq'-th month, e.g., 2=every other month # # sub: keep subset of the data (sub='pre','post','all') # # # Determine when patient first visits clinic # # Random entry Uniform(1,mon) # entry <- ifelse(first1,1,round(runif(1,1,mon))) # true.x <- seq(entry,mon+entry-1) # obs.x <- seq(entry,mon+entry-1,freq) # n.obs <- length(obs.x) # n.true <- length(true.x) # same as mon # # haart <- numeric(n.true) # Only need for every obs visit, but will restrict later. # event <- numeric(n.true) # Calculate event at every month # # # Estimate cd4, using true.x # # Pre (true), w/wo noise # pre.cd4 <- round(pre[1]*true.x + pre[2]) # pre.cd4[pre.cd4 < 0] <- 0 # # pre.cd4.wn <- round((sqrt(pre.cd4) + rnorm(n.true,mean=0,sd=1))^2) # # # Pre (observed), w/wo noise # pre.cd4.obs <- pre.cd4[match(obs.x,true.x)] # pre.cd4.wn.obs <- pre.cd4.wn[match(obs.x,true.x)] # # # Determine when (in terms of CD4) HAART will be initiated. This depends only on the observed # # cd4 values with noise!!! # XtoH <- rbinom(1,1,0.25) # if(!haart0.true) { # haart.prob <- unlist(apply(X=cbind(pre.cd4.wn.obs,NULL),MARGIN=2,haart2.p,a=haart.arg[1],b=haart.arg[2],c=haart.arg[3],Xbinary=XtoH)) # first.haart <- which(sapply(haart.prob,rbinom,n=1,size=1) > 0)[1] # if(!is.na(first.haart)) haart[seq(match(obs.x[first.haart],true.x), n.true)] <- 1 # } else { # first.haart <- which(pre.cd4.wn.obs < haart0)[1] # if(!is.na(first.haart)) haart[seq(match(obs.x[first.haart],true.x), n.true)] <- 1 # } # # # Identify the true CD4 when HAART is initiated -- this will be used to calculate asymp. CD4. # cd4haart0 <- ifelse(is.na(first.haart),NA,pre.cd4[match(obs.x[first.haart],true.x)]) # # post.cd4 <- post.cd4.wn <- rep(NA,n.true) # # # Post (w/wo noise) # if(!is.na(cd4haart0)) { # # Post-HARRT cd4 (true) -- note, I am using seq(n.true) instead of x.true since # # it only pertains to pre-treatment. # # Gras -- paper # asymp.cd4 <- round(rnorm(1,mean=exp(0.31*log(cd4haart0+50) + 4.8),sd=15)) # if(asymp.cd4 < 0) asymp.cd4 <- 0 # # post.cd4 <- round(asymp.cd4-(asymp.cd4-cd4haart0)*exp(post[1]*seq(n.true))) # post.cd4[post.cd4 < 0] <- 0 # # # Post-HARRT cd4 (w/noise) # post.cd4.wn <- round((sqrt(post.cd4) + rnorm(n.true,mean=0,sd=1))^2) # post.cd4.wn[post.cd4.wn < 0] <- 0 # } # # # Create true cd4 trajectory. # if(sub=='pre') { # cd4.out <- pre.cd4 # cd4.out.wn <- pre.cd4.wn # haart <- rep(0,n.true) # not needed # } # # if(sub=='post') { # cd4.out <- post.cd4 # cd4.out.wn <- post.cd4.wn # haart <- rep(1,n.true) # not needed # } # # if(sub=='all') { # cd4.out <- pre.cd4 # cd4.out.wn <- pre.cd4.wn # # if(any(haart==1)) { # # Assume patient visits the clinic at the first of the month, then if eligible for # # treatment, then they start treatment right away! # cd4.out <- c(pre.cd4[haart==0], pre.cd4[haart==1][1], post.cd4)[seq(n.true)] # cd4.out.wn <- c(pre.cd4.wn[haart==0],pre.cd4.wn[haart==1][1], post.cd4.wn)[seq(n.true)] # haart <- c(haart[haart==0],1,haart[haart==1][-1]) # } # } # # # Update event indicator; use real cd4 # # X -> Y (binary X) # XtoY <- rbinom(1,1,0.25) # event.prob <- event2.p(a=event.arg[1],b=event.arg[2],c=event.arg[3], # d=event.arg[4],e=event.arg[5], # CD4=cd4.out,TIMEHAART=cumsum(haart), # Xbinary=rep(XtoY,n.true)) # if(any(is.na(event.prob))) event.prob[is.na(event.prob)] <- 0 # # # CD4[i] predicts EVENT[i+1] # event.prob <- c(0,event.prob) # event.prob <- event.prob[-length(event.prob)] # # first.event <- which(sapply(event.prob,rbinom,n=1,size=1) > 0)[1] # if(!is.na(first.event)) event[seq(first.event, n.true)] <- 1 # # # Event may have occurred between visits, thus we should ONLY report info # # that occurred before the event + the event. # if(any(event==1)) { # obs.x2 <- unique(sort(c(obs.x,true.x[first.event]))) # Add first.event (if not already there) # obs.x <- obs.x2[obs.x2 <= true.x[first.event] ] # } # # outs <- data.frame(x=true.x, y=cd4.out, y.wn=cd4.out.wn, event, haart, XtoY, XtoH) # outs <- outs[match(obs.x,outs$x),] # outs$x <- outs$x - (outs$x[1]-1) # attr(outs,'ep') <- cbind(true.x,event,haart,event.prob) # outs # } # # # n <- 1000 # step <- 10 # nmon <- 120 # haart0.i <- rep(seq(0,700,10),n) # n.haart0 <- length(haart0.i) # ##### optimal 300 # event.arg.700 <- c(-0.007,1e-06,0,-3.1,0) # event.arg.500 <- c(-0.007,1e-06,0.03,-3.5,0) # event.arg.300 <- c(-0.007,1e-06,0.05,-4.1,0) # event.arg = event.arg.300 # # set.seed(1) # #evargs <- cbind(-0.007, 1e-06, seq(0,0.06,0.01), -3.1, 0) # #evargs <- cbind(-0.007, 1e-06, 0.08, seq(-5.6,-4.1,0.25), 0, 0) # surv.cp.n.300 <- rep(NA, n.haart0) # # for (i in seq(n.haart0)){ # cd4x <- cd4(mon = nmon, haart0 = haart0.i[i], sdratio = 0.1, # wnoise = TRUE, # pre = c(-5.8, 700), post = -0.05, # #pre = c(round(rnorm(1,mean=-5.8,sd=0.75),2),round(rnorm(1,mean=700,sd=50),0)), # #post = round(rnorm(1,mean=-0.05,sd=0.01),2), # event.arg = event.arg, # first1 = TRUE, haart0.true = TRUE, # haart.arg = c(1,-0.01,0), # freq = 1, sub = "all") # surv.cp.n.300[i] <- cumprod(1-attr(cd4x,'ep')[,'event.prob'])[nmon] # } # # event.arg = event.arg.500 # surv.cp.n.500 <- rep(NA, n.haart0) # for (i in seq(n.haart0)){ # cd4x <- cd4(mon = nmon, haart0 = haart0.i[i], sdratio = 0.1, # wnoise = TRUE, # pre = c(-5.8, 700), post = -0.05, # #pre = c(round(rnorm(1,mean=-5.8,sd=0.75),2),round(rnorm(1,mean=700,sd=50),0)), # #post = round(rnorm(1,mean=-0.05,sd=0.01),2), # event.arg = event.arg, # first1 = TRUE, haart0.true = TRUE, # haart.arg = c(1,-0.01,0), # freq = 1, sub = "all") # surv.cp.n.500[i] <- cumprod(1-attr(cd4x,'ep')[,'event.prob'])[nmon] # } # # event.arg = event.arg.700 # surv.cp.n.700 <- rep(NA, n.haart0) # for (i in seq(n.haart0)){ # cd4x <- cd4(mon = nmon, haart0 = haart0.i[i], sdratio = 0.1, # wnoise = TRUE, # pre = c(-5.8, 700), post = -0.05, # #pre = c(round(rnorm(1,mean=-5.8,sd=0.75),2),round(rnorm(1,mean=700,sd=50),0)), # #post = round(rnorm(1,mean=-0.05,sd=0.01),2), # event.arg = event.arg, # first1 = TRUE, haart0.true = TRUE, # haart.arg = c(1,-0.01,0), # freq = 1, sub = "all") # surv.cp.n.700[i] <- cumprod(1-attr(cd4x,'ep')[,'event.prob'])[nmon] # } # # # save.image("survival_curve.RData") ##### figue 1 load("survival_curve.RData") postscript("when2startFig6_ABC.eps", horizontal = FALSE, onefile = FALSE, paper = "special",height=10, width=8) layout(matrix(c(1, 1, 1, 2,3, 4, 5, 6, 7), 3,3, byrow=TRUE)) par(mar=c(3.5,2.6,1.5,0.1)) par(mgp = c(1.75, 0.5, 0), oma=c(0.5,4,1.5,0.5)) #par(mar=c(3.5,2.5,0.5,0.1),mgp = c(1.75, 0.5, 0), oma=c(0.5,0.5,1,0.5)) # plot(unique(haart0.i),tapply(surv.cp.n.700, haart0.i,mean),type='l',ylim=c(0,1), # xlim=c(0, 715),main="", # xlab="CD4 Threshold",ylab='10-year Survival Probability', # cex.main=0.9, cex.lab=0.9, cex.axis=0.9) plot(unique(haart0.i),tapply(surv.cp.n.700, haart0.i,mean),type='l',ylim=c(0,1), xlim=c(0, 715),main="", xlab="CD4 Threshold",ylab='10-year Survival Probability', cex.main=1, cex.lab=1, cex.axis=1) abline(v=700, lty=2) mtext("A.", outer=TRUE, side=3,font=2 , at=0) lines(unique(haart0.i),tapply(surv.cp.n.500, haart0.i,mean),typ='l',ylim=c(0,1), xlim=c(0, 715),main='', col=2) abline(v=500, lty=2, col=2) lines(unique(haart0.i),tapply(surv.cp.n.300, haart0.i,mean),typ='l',ylim=c(0,1), xlim=c(715,0),main='', col=3) abline(v=350, lty=2, col=3) legend("bottomleft", legend=c("Optimal CD4 Threshold = 700", "Optimal CD4 Threshold = 500", "Optimal CD4 Threshold = 350"), col=1:3, lty=1, bty="n", cex=0.9) #par(mfrow=c(1,3), mar=c(5, 0.2, 4, 0.2)) #### Grabbing the 'truth' -- result based on a very large simulation load("when2start_add_n500000.Rdata") cole.est.summary1 <-cbind(apply(sapply(cole_result_350, FUN=function(x) log(x[,1])), 1, mean), apply(sapply(cole_result_500, FUN=function(x) log(x[,1])), 1, mean), apply(sapply(cole_result_700, FUN=function(x) log(x[,1])), 1, mean)) colnames(cole.est.summary1) <- c("optimal: 350", "optimal: 500", "optimal: 700") hernan.est.summary1 <-cbind(apply(sapply(hernan_result_350, FUN=function(x) log(x[,1])), 1, mean), apply(sapply(hernan_result_500, FUN=function(x) log(x[,1])), 1, mean), apply(sapply(hernan_result_700, FUN=function(x) log(x[,1])), 1, mean)) colnames(hernan.est.summary1) <- c("optimal: 350", "optimal: 500", "optimal: 700") load("when2start_add_sim500_new.RData") #load("when2start_add_sim500.Rdata") ####### cole_result_700[[143]]["501-600 vs. 601-700",] <- NA hernan_result_350[[1]]["350 vs. 450",] <- NA mode(hernan_result_350[[1]]) <- "numeric" hernan_result_350[[68]]["350 vs. 450",] <- NA mode(hernan_result_350[[68]]) <- "numeric" hernan_result_700[[342]]["500 vs. 600",] <- NA mode(hernan_result_700[[342]]) <- "numeric" hernan_result_700[[416]]["450 vs. 550",] <- NA mode(hernan_result_700[[416]]) <- "numeric" hernan_result_700[[436]]["500 vs. 600",] <- NA mode(hernan_result_700[[436]]) <- "numeric" #### should average the point estimates on log HR scale and caculate the length of CI on log HR scale cole.est.summary <-cbind(apply(sapply(cole_result_350, FUN=function(x) log(x[,1])), 1, mean, na.rm=TRUE), apply(sapply(cole_result_500, FUN=function(x) log(x[,1])), 1, mean, na.rm=TRUE), apply(sapply(cole_result_700, FUN=function(x) log(x[,1])), 1, mean, na.rm=TRUE)) colnames(cole.est.summary) <- c("optimal: 350", "optimal: 500", "optimal: 700") cole.ci_length.summary <- cbind(apply(sapply(cole_result_350, FUN=function(x) log(x[,3])-log(x[,2])), 1, mean, na.rm=TRUE), apply(sapply(cole_result_500, FUN=function(x) log(x[,3])- log(x[,2])), 1, mean, na.rm=TRUE), apply(sapply(cole_result_700, FUN=function(x) log(x[,3])- log(x[,2])), 1, mean, na.rm=TRUE)) colnames(cole.ci_length.summary) <- c("optimal: 350", "optimal: 500", "optimal: 700") hernan.est.summary <-cbind(apply(sapply(hernan_result_350, FUN=function(x) log(x[,1])), 1, mean, na.rm=TRUE), apply(sapply(hernan_result_500, FUN=function(x) log(x[,1])), 1, mean, na.rm=TRUE), apply(sapply(hernan_result_700, FUN=function(x) log(x[,1])), 1, mean, na.rm=TRUE)) colnames(hernan.est.summary) <- c("optimal: 350", "optimal: 500", "optimal: 700") hernan.ci_length.summary <- cbind(apply(sapply(hernan_result_350, FUN=function(x) log(x[,3])- log(x[,2])), 1, mean, na.rm=TRUE), apply(sapply(hernan_result_500, FUN=function(x) log(x[,3])-log(x[,2])), 1, mean, na.rm=TRUE), apply(sapply(hernan_result_700, FUN=function(x) log(x[,3])-log(x[,2])), 1, mean, na.rm=TRUE)) colnames(hernan.ci_length.summary) <- c("optimal: 350", "optimal: 500", "optimal: 700") cole.est.summary <- cole.est.summary[11:1, 3:1] cole.ci_length.summary <- cole.ci_length.summary[11:1, 3:1] hernan.est.summary <- hernan.est.summary[11:1,3:1] hernan.ci_length.summary <- hernan.ci_length.summary[11:1,3:1] cd4 <- seq(100, 600, by=50) cole.est.summary1 <- cole.est.summary1[11:1, 3:1] hernan.est.summary1 <- hernan.est.summary1[11:1,3:1] plot(exp(cole.est.summary[,1]), cd4,log="x", type="n",axes=FALSE, xlab="Hazard Ratio", ylab="", xlim=c(0.3, 8), ylim=c(601, 99), main="optimal CD4 700") abline(v=1) axis(1, at=c(0.3,0.5, 1, 2, 4 ,8), labels=c( "0.3","0.5", "1", "2", "4", "8")) segments( exp(cole.est.summary[,1]-0.5*cole.ci_length.summary[,1]), cd4, exp(cole.est.summary[,1]+0.5*cole.ci_length.summary[,1]), cd4) points(exp(cole.est.summary[,1]), cd4, pch="|", cex=0.6) segments( exp(hernan.est.summary[,1]-0.5*hernan.ci_length.summary[,1]), cd4+10, exp(hernan.est.summary[,1]+0.5*hernan.ci_length.summary[,1]), cd4+10, col=4) points(exp(hernan.est.summary[,1]), cd4+10, pch="|", cex=0.6, col=4) axis(2, at=cd4-5, labels=unlist(lapply(cd4, FUN=function(x) paste(x,"vs.", x+100))), las=1, tck=0, col=0) #points(exp(cole.est.summary1[,1]), cd4, pch="|", col=2) #points(exp(hernan.est.summary1[,1]), cd4+10, pch="|", col=2) plot(exp(cole.est.summary[,2]), cd4,log="x", type="n",axes=FALSE, xlab="Hazard Ratio", ylab="", xlim=c(0.3, 8), ylim=c(601, 99), main="optimal CD4 500") abline(v=1) axis(1, at=c(0.3,0.5, 1, 2, 4 ,8), labels=c( "0.3","0.5", "1", "2", "4", "8")) segments( exp(cole.est.summary[,2]-0.5*cole.ci_length.summary[,2]), cd4, exp(cole.est.summary[,2]+0.5*cole.ci_length.summary[,2]), cd4) points(exp(cole.est.summary[,2]), cd4, pch="|", cex=0.6) segments( exp(hernan.est.summary[,2]-0.5*hernan.ci_length.summary[,2]), cd4+10, exp(hernan.est.summary[,2]+0.5*hernan.ci_length.summary[,2]), cd4+10, col=4) points(exp(hernan.est.summary[,2]), cd4+10, pch="|", cex=0.6, col=4) #points(exp(cole.est.summary1[,2]), cd4, pch="|", col=2) #points(exp(hernan.est.summary1[,2]), cd4+10, pch="|", col=2) plot(exp(cole.est.summary[,3]), cd4,log="x", type="n",axes=FALSE, xlab="Hazard Ratio", ylab="", xlim=c(0.3, 8), ylim=c(601, 99), main="optimal CD4 350") abline(v=1) axis(1, at=c(0.3,0.5, 1, 2, 4 ,8), labels=c( "0.3","0.5", "1", "2", "4", "8")) segments( exp(cole.est.summary[,3]-0.5*cole.ci_length.summary[,3]), cd4, exp(cole.est.summary[,3]+0.5*cole.ci_length.summary[,3]), cd4) points(exp(cole.est.summary[,3]), cd4, pch="|", cex=0.6) segments( exp(hernan.est.summary[,3]-0.5*hernan.ci_length.summary[,3]), cd4+10, exp(hernan.est.summary[,3]+0.5*hernan.ci_length.summary[,3]), cd4+10, col=4) points(exp(hernan.est.summary[,3]), cd4+10, pch="|", cex=0.6, col=4) mtext("B.",outer=TRUE, side=2,font=2 , at=0.67, las=1) ##### FIGURE 6C cole.cov<-matrix(NA,11,3) for (i in 1:11){ for (j in 1:3) { truth<-exp(cole.est.summary1[12-i,4-j]) cole.est.coverage <-cbind(apply(sapply(cole_result_350, FUN=function(x) ifelse(x[,2]truth,1,0)), 1, mean, na.rm=TRUE), apply(sapply(cole_result_500, FUN=function(x) ifelse(x[,2]truth,1,0)), 1, mean, na.rm=TRUE), apply(sapply(cole_result_700, FUN=function(x) ifelse(x[,2]truth,1,0)), 1, mean, na.rm=TRUE)) cole.cov[i,j]<-cole.est.coverage[i,j] } } colnames(cole.cov) <- c("optimal: 350", "optimal: 500", "optimal: 700") rownames(cole.cov) <- rownames(cole.est.coverage)[1:11] cole.cov hernan.cov<-matrix(NA,11,3) for (i in 1:11){ for (j in 1:3) { truth<-exp(hernan.est.summary1[12-i,4-j]) hernan.est.coverage <-cbind(apply(sapply(hernan_result_350, FUN=function(x) ifelse(x[,2]truth,1,0)), 1, mean, na.rm=TRUE), apply(sapply(hernan_result_500, FUN=function(x) ifelse(x[,2]truth,1,0)), 1, mean, na.rm=TRUE), apply(sapply(hernan_result_700, FUN=function(x) ifelse(x[,2]truth,1,0)), 1, mean, na.rm=TRUE)) hernan.cov[i,j]<-hernan.est.coverage[i,j] } } colnames(hernan.cov) <- c("optimal: 350", "optimal: 500", "optimal: 700") rownames(hernan.cov) <- rownames(hernan.est.coverage)[1:11] hernan.cov ####### Estimated SE versus empirical cole.emp.sd <-cbind(apply(sapply(cole_result_350, FUN=function(x) log(x[,1])), 1, sd, na.rm=TRUE), apply(sapply(cole_result_500, FUN=function(x) log(x[,1])), 1, sd, na.rm=TRUE), apply(sapply(cole_result_700, FUN=function(x) log(x[,1])), 1, sd, na.rm=TRUE)) colnames(cole.emp.sd) <- c("optimal: 350", "optimal: 500", "optimal: 700") cole.mean.se <- cbind(apply(sapply(cole_result_350, FUN=function(x) (log(x[,3])-log(x[,2]))/(1.96*2)), 1, mean, na.rm=TRUE), apply(sapply(cole_result_500, FUN=function(x) (log(x[,3])- log(x[,2]))/(1.96*2)), 1, mean, na.rm=TRUE), apply(sapply(cole_result_700, FUN=function(x) (log(x[,3])- log(x[,2]))/(1.96*2)), 1, mean, na.rm=TRUE)) colnames(cole.mean.se) <- c("optimal: 350", "optimal: 500", "optimal: 700") cbind(round(cole.mean.se,4),round(cole.emp.sd,4))[11:1,] ### mean SE is bigger than SD of estimates, especially for low CD4 comparisons round(cbind(cole.mean.se[,3],cole.emp.sd[,3], cole.mean.se[,2],cole.emp.sd[,2], cole.mean.se[,1],cole.emp.sd[,1]),4)[11:1,] hernan.emp.sd <-cbind(apply(sapply(hernan_result_350, FUN=function(x) log(x[,1])), 1, sd, na.rm=TRUE), apply(sapply(hernan_result_500, FUN=function(x) log(x[,1])), 1, sd, na.rm=TRUE), apply(sapply(hernan_result_700, FUN=function(x) log(x[,1])), 1, sd, na.rm=TRUE)) colnames(hernan.emp.sd) <- c("optimal: 350", "optimal: 500", "optimal: 700") hernan.mean.se <- cbind(apply(sapply(hernan_result_350, FUN=function(x) (log(x[,3])-log(x[,2]))/(1.96*2)), 1, mean, na.rm=TRUE), apply(sapply(hernan_result_500, FUN=function(x) (log(x[,3])- log(x[,2]))/(1.96*2)), 1, mean, na.rm=TRUE), apply(sapply(hernan_result_700, FUN=function(x) (log(x[,3])- log(x[,2]))/(1.96*2)), 1, mean, na.rm=TRUE)) colnames(hernan.mean.se) <- c("optimal: 350", "optimal: 500", "optimal: 700") cbind(hernan.mean.se,hernan.emp.sd) round(cbind(hernan.mean.se[,3],hernan.emp.sd[,3], hernan.mean.se[,2],hernan.emp.sd[,2], hernan.mean.se[,1],hernan.emp.sd[,1]),4)[11:1,] ### Now comparing Hernan and Cole estimates within each replication iteration hr.ratio.350<-matrix(NA,500,11) for (i in 1:500) { for (j in 1:11) { hr.ratio.350[i,j]<-hernan_result_350[[i]][j,1]/cole_result_350[[i]][j,1] } } hr.ratio.350.sd<-apply(hr.ratio.350,2, FUN=function(x) sd(log(x),na.rm=TRUE))[11:1] hr.ratio.350.mean<-apply(hr.ratio.350,2, FUN=function(x) mean(log(x),na.rm=TRUE))[11:1] hr.ratio.500<-matrix(NA,500,11) for (i in 1:500) { for (j in 1:11) { hr.ratio.500[i,j]<-hernan_result_500[[i]][j,1]/cole_result_500[[i]][j,1] } } hr.ratio.500.sd<-apply(hr.ratio.500,2, FUN=function(x) sd(log(x),na.rm=TRUE))[11:1] hr.ratio.500.mean<-apply(hr.ratio.500,2, FUN=function(x) mean(log(x),na.rm=TRUE))[11:1] hr.ratio.700<-matrix(NA,500,11) for (i in 1:500) { for (j in 1:11) { hr.ratio.700[i,j]<-hernan_result_700[[i]][j,1]/cole_result_700[[i]][j,1] } } hr.ratio.700.sd<-apply(hr.ratio.700,2, FUN=function(x) sd(log(x),na.rm=TRUE))[11:1] hr.ratio.700.mean<-apply(hr.ratio.700,2, FUN=function(x) mean(log(x),na.rm=TRUE))[11:1] plot(exp(hr.ratio.700.mean), cd4,log="x", type="n",axes=FALSE, xlab="Ratio of Hazard Ratios", ylab="", xlim=c(0.15, 6), ylim=c(601, 99), main="optimal CD4 700") abline(v=1) axis(1, at=c(0.2,0.5, 1, 2, 4), labels=c( "0.2","0.5", "1", "2", "4")) axis(2, at=cd4-5, labels=unlist(lapply(cd4, FUN=function(x) paste(x,"vs.", x+100))), las=1, tck=0, col=0) segments( exp(hr.ratio.700.mean-1.96*hr.ratio.700.sd), cd4, exp(hr.ratio.700.mean+1.96*hr.ratio.700.sd), cd4) points(exp(hr.ratio.700.mean), cd4, pch="|", cex=0.6) plot(exp(hr.ratio.500.mean), cd4,log="x", type="n",axes=FALSE, xlab="Ratio of Hazard Ratios", ylab="", xlim=c(0.15, 6), ylim=c(601, 99), main="optimal CD4 500") abline(v=1) axis(1, at=c(0.2,0.5, 1, 2, 4), labels=c( "0.2","0.5", "1", "2", "4")) segments( exp(hr.ratio.500.mean-1.96*hr.ratio.500.sd), cd4, exp(hr.ratio.500.mean+1.96*hr.ratio.500.sd), cd4) points(exp(hr.ratio.500.mean), cd4, pch="|", cex=0.6) plot(exp(hr.ratio.350.mean), cd4,log="x", type="n",axes=FALSE, xlab="Ratio of Hazard Ratios", ylab="", xlim=c(0.15, 6), ylim=c(601, 99), main="optimal CD4 350") abline(v=1) axis(1, at=c(0.2,0.5, 1, 2, 4), labels=c( "0.2","0.5", "1", "2", "4")) segments( exp(hr.ratio.350.mean-1.96*hr.ratio.350.sd), cd4, exp(hr.ratio.350.mean+1.96*hr.ratio.350.sd), cd4) points(exp(hr.ratio.350.mean), cd4, pch="|", cex=0.6) mtext("C.",outer=TRUE, side=2,font=2 , at=0.33, las=1) dev.off()