#CCASAnet data #ART Heterogeneity Analysis #Giganti #Last edited: 01DEC2013 ############################################### ### Phase 1: Dataset prep ############################################### setwd("~/Documents/CCASAnet/Data/") #Import data basic<-read.csv("basic.csv",header=T) art<-read.csv("art.csv",header=T) follow<-read.csv("follow.csv",header=T) visit<-read.csv("visit.csv",header=T) lab_cd4<-read.csv("lab_cd4.csv",header=T) lab_rna<-read.csv("lab_rna.csv",header=T) #Site ID and Patient ID are two seperate variables #Create a single patient identifier that combines site and patient ID #Note: formatC is there to add leading zeros so IDs stay numeric! SitePatientID<-function(df){ df$site.patient<-paste(df$site,"-",formatC(df$patient, width = 6, format = "d", flag = "0")) } #Create unique patient ID for each data table basic$site.patient<-SitePatientID(basic) art$site.patient<-SitePatientID(art) follow$site.patient<-SitePatientID(follow) lab_cd4$site.patient<-SitePatientID(lab_cd4) visit$site.patient<-SitePatientID(visit) #Date reformatting basic$birth_dt<-as.Date(basic$birth_d) #Reformat other dates from different data tables art$startdate<-as.Date(art$art_sd) #basic$birth_dt<-as.Date(basic$birth_d) basic$baseline_dt<-as.Date(basic$baseline_d) follow$lastalive_dt<-as.Date(follow$l_alive_d) lab_cd4$cd4_dt<-as.Date(lab_cd4$cd4_d) visit$visit_dt<-as.Date(visit$visit_d) #Workaround to create date variable #Problem because first observation is "" temp_aids_dt<-ifelse(basic$aids_d=="","1900-01-01",as.character(basic$aids_d)) temp_aids_dt<-as.Date(temp_aids_dt) basic$aids_dt<-ifelse(temp_aids_dt=="1900-01-01",NA,temp_aids_dt) temp_aids_dt<-NULL temp_death_dt<-ifelse(follow$death_d=="","1900-01-01",as.character(follow$death_d)) temp_death_dt<-as.Date(temp_death_dt) follow$death_dt<-ifelse(temp_death_dt=="1900-01-01",NA,temp_death_dt) temp_death_dt<-NULL ############### #Follow Dataset ############### #Creating analysis variables #Censor date follow$death_dt<-ifelse(follow$death_dt=="1900-01-01",NA, follow$death_dt) follow$censor_dt<-ifelse(follow$death_y==1 & is.na(follow$death_dt)==F, follow$death_dt,follow$lastalive_dt) follow$censor_dt<-as.Date(follow$censor_dt,origin="1970-01-01") #Event - yes=1, no=0 #Use death_y variable #Create truncated dataset to merge later follow.short<-follow[,c("site.patient","death_y","censor_dt")] #Goal: Find ART start date #Strategy: Restrict dataset to just first ART observation art_temp<-do.call("rbind", by(art, INDICES=art$site.patient, FUN=function(art) art[which.min(art$startdate), ])) Artstartdate<-art_temp[,c("site.patient","startdate","regimen_class")] names(Artstartdate)<-c("site.patient","FirstARTdate","ARVclass") #Simplify ARV class variable: NNRTI,Boosted PI, nonHAART,other Artstartdate$ARVclass<-toupper(Artstartdate$ARVclass) Artstartdate$ARVclass_orig<-Artstartdate$ARVclass Artstartdate$ARVclass<-with(Artstartdate, as.factor(ifelse(ARVclass=="NNRTI","NNRTI", ifelse(ARVclass=="BOOSTED PI","BOOSTED PI", ifelse(ARVclass=="3 NRTI","OTHER", ifelse(ARVclass=="UNBOOSTED PI","OTHER", ifelse(ARVclass=="PI","OTHER", ifelse(ARVclass=="OTHER","OTHER", ifelse(ARVclass=="NONHAART","nonHAART", ifelse(ARVclass=="NON HAART","nonHAART", NA)))))))))) #Check to confirm regimen class is appropriate art_temp[is.na(Artstartdate$ARVclass),c("site.patient","art_id")] ########################### #Check for first CD4 count ############################ lab_temp<-merge(lab_cd4,Artstartdate,by="site.patient") #Time since initiation #Restrict to those labs within baseline CD4 window -180 - +7 days within initiation lab_temp$ART1lag<-lab_temp$cd4_dt-lab_temp$FirstARTdate lab_temp2<-lab_temp[lab_temp$ART1lag<=7 & lab_temp$ART1lag>-181,] #How to choose 'best lab (per 11/11 convo with BS: #1 - Look for lab closest to start date BEFORE initiation #2 - If no lab within 180 days before initiation, take first lab within 7 days #Create temp variable to make this happen #Take abs value of all variables, add 180 days to labs after initiation #Grab the minimum lab_temp2$cd4window_temp<-abs(lab_temp2$ART1lag)+((lab_temp2$ART1lag)>0)*180 lab_temp3<-do.call("rbind", by(lab_temp2, INDICES=lab_temp2$site.patient, FUN=function(lab_temp2) lab_temp2[which.min(lab_temp2$cd4window_temp), ])) #Create condensed CD4 dataset for analysis dataset merging BaselineCD4data<-lab_temp3[,c("site.patient","cd4_v","cd4_dt","ART1lag")] names(BaselineCD4data)<-c("site.patient","BaselineCD4","BaselineCD4_dt","BaselineCD4_lag") #Nadir CD4 #Does not place lower bound on CD4 date lab_temp_t<-lab_temp[lab_temp$ART1lag<=7 & is.na(lab_temp$cd4_v)==F,] lab_temp_tt<-do.call("rbind", by(lab_temp_t, INDICES=lab_temp_t$site.patient, FUN=function(lab_temp_t) lab_temp_t[which.min(lab_temp_t$cd4_v), ])) nadirCD4data<-lab_temp_tt[,c("site.patient","cd4_v","cd4_dt","ART1lag")] names(nadirCD4data)<-c("site.patient","nadirCD4","nadirCD4_dt","nadirCD4_lag") ############################ #AIDS at enrollment status ############################ #replace . with "" basic$whostage<-ifelse(basic$whostage=="." |basic$whostage=="9" ,"",as.character(basic$whostage)) #Function to combine whostage and cdc staging info assign.clincalstage<-function(table){ clinical.stage<-with(table, ifelse(whostage==""&cdcstage=="","", ifelse(whostage==""&cdcstage!="",as.character(cdcstage), ifelse(whostage!=''&cdcstage=="",paste("WHO ",as.character(whostage),sep=""), paste(whostage,cdcstage,sep=";"))))) return(clinical.stage) } #Function to classify staging as AIDS/not AIDS classify.stage<-function(clinical.stage) { ifelse(is.na(clinical.stage)| clinical.stage=="","Unknown", ifelse(clinical.stage=="A or B"| clinical.stage=="A"| clinical.stage=="A1"| clinical.stage=="A2"| clinical.stage=="A3"| clinical.stage=="B"| clinical.stage=="B1"| clinical.stage=="B2"| clinical.stage=="B3"| clinical.stage=="WHO 1"| clinical.stage=="WHO 2"| clinical.stage=="WHO 3"| clinical.stage=="WHO 2 or 3","not AIDS", ifelse(clinical.stage=="C"| clinical.stage=="C1"| clinical.stage=="C2"| clinical.stage=="C3"| clinical.stage=="WHO 4","AIDS","SOMETHING ELSE"))) } #Run combination and classification of clinical staging for basic & visit tables basic$clinical.stage<-assign.clincalstage(basic) visit$clinical.stage<-assign.clincalstage(visit) basic$clinical.stage.cat<-classify.stage(basic$clinical.stage) visit$clinical.stage.cat<-classify.stage(visit$clinical.stage) visit.uniquepatients<-unique(visit[,c("site.patient","site")]) visit_temp<-merge(visit[,c("site.patient","visit_dt","clinical.stage.cat")], Artstartdate[,c("site.patient","FirstARTdate")],by="site.patient") visit_temp$indicator<-1 visit_temp$daysfromART<-visit_temp$visit_dt-visit_temp$FirstARTdate #We are interested in generating a variable that classifies a patient's AIDS status prior to treatment initiation. There are five primary variables in the database for assessing a patient's status. The 'aids_y' variable in the basic table, the 'whostage' variable in the basic table, the 'cdcstage' variable in the basic table, the 'whostage' variable in the visit table and the 'cdcstage variable in the visit table. We want to create a composite variable using all of these sources. #To generate such a composite variable, I divide the work into three subtasks: (1) ADE using clinical data in visit table, (2) ADE using clinical data in basic table, (3) ADE using reported status in basic table. #For every patient interaction in the visit table, we can classify a patient as AIDS on not AIDS using the whostage and cdcstage variables. Next, we need to limit our interactions to just those which occurred prior to patient treatment initiation. Thus, we exclude all observations which occurred more than 30 days AFTER the patient's initial treatment date. Using this subsetted dataset, I next look for unique patients who had at least one visit with a ADE. Create a variable that contains a list of these patients. Next, I subset the dataset to visits that occurred no more than 365 days BEFORE the patient's treatment initiation and no more than 30 days AFTER treatment initiation. In this subset, I look for patients with at least one confirmed not AIDS visit and create a variable that contains a list of these patients. I combine these two lists -- if a patient is in both groups, I override with the AIDS classification. It is important to generate the not AIDS list because we want to differentiate between patients with not AIDS and thus with unknown status. This provides ADE using clinical data in the visit table (1). To create the composite variable, I look at all three subvariables simultaneously. If either of (1), (2), or (3) has a AIDS status, the patient's preART status is defined as "AIDS". If non has a AIDS status and at least one has a not AIDS status, they are defined as "not AIDS". Finally, if a patient's status is not clear in any of these three sources, they are classified as "Unknown". #The window between a patient's last visit with an assessment of AIDS and their treatment intiation varies by patient #A patient's AIDS status is dynamic. Currently, we are classifying a patient as non-AIDS if their last visit before treatment was non-AIDS, regardless of the length of the window between the last visit date and treatment start date #The variable - PreART.window tries to limit how recent that visit is, to distinguish between unknown and non-AIDS. Post ART window is the window following the ART initiation date that we use to assess status at time of initiation. PreART.window<-365 PostART.window<-30 #Step 1: Find unique patients with any clinical documentation of AIDS before ART visit_clinicalaids<-unique( with(visit_temp, visit_temp[which(daysfromART<=PostART.window & clinical.stage.cat=="AIDS"), c("site.patient","indicator")])) colnames(visit_clinicalaids)<-c("site.patient","AIDS") #Step 2: Try to find patients with non-AIDS status at time of ART initiation #Want to identify all cases where patient has documentation of 'not-AIDS' between the pre-ART window and post-ART window visit_clinicalnotaids<-unique( with(visit_temp, visit_temp[which(daysfromART>=-PreART.window & daysfromART<=PostART.window & clinical.stage.cat=="not AIDS"),c("site.patient","indicator")])) colnames(visit_clinicalnotaids)<-c("site.patient","notAIDS") #Step 3: Try to find patients with no visits preART and their first visit after ART suggests AIDS #Find first visit #Note: For now these patients are classified as unknown and this step is unnecessary #Note: In future, may consider reclassifying these patients as nonAIDS visit_first<-do.call("rbind", by(visit_temp, INDICES=visit_temp$site.patient, FUN=function(visit_temp) visit_temp[which.min(visit_temp$daysfromART), ])) #Limit to patients whose first visit is documented non-AIDS visit_clinicalnotaids_post<-with(visit_first, visit_first[visit_dt>FirstARTdate+PostART.window & clinical.stage.cat=="not AIDS", c("site.patient","indicator")]) colnames(visit_clinicalnotaids_post)<-c("site.patient","notAIDSpost") #Create dataframe that assesses AIDS status pre-ART using visit data #Merge 3 steps above AIDSstatus_temp4<-merge(basic[,c("site.patient","site","baseline_dt","aids_dt","aids_y","clinical.stage.cat")], visit.uniquepatients,by="site.patient",all.x=TRUE) AIDSstatus_temp3<-merge(AIDSstatus_temp4,Artstartdate[,c("site.patient","FirstARTdate")],by="site.patient",all.x=TRUE) AIDSstatus_temp2<-merge(AIDSstatus_temp3,visit_clinicalaids[,c("site.patient","AIDS")],by="site.patient",all.x=TRUE) AIDSstatus_temp1<-merge(AIDSstatus_temp2,visit_clinicalnotaids[,c("site.patient","notAIDS")],by="site.patient",all.x=TRUE) AIDSstatus<-merge(AIDSstatus_temp1,visit_clinicalnotaids_post[,c("site.patient","notAIDSpost")],by="site.patient",all.x=TRUE) AIDSstatus$AIDSstatus.visit<-with(AIDSstatus, ifelse(AIDS==1 & is.na(AIDS)==FALSE,"AIDS", ifelse(notAIDS==1 & is.na(notAIDS)==FALSE,"not AIDS", ifelse(notAIDSpost==1 & is.na(notAIDSpost)==FALSE, "Unknown", "Unknown")))) #Assign patients with no visits as unknown AIDS status for the 'visit' variable #Classify AIDS status at treatment initation using aids_y variable AIDSstatus$AIDSstatus.reported<- with(AIDSstatus, ifelse(is.na(baseline_dt)=="TRUE", "No enroll data", ifelse(is.na(FirstARTdate)=="TRUE", "Not ART", ifelse(is.na(aids_y)=="TRUE" | aids_y==9, "Unknown", ifelse(aids_y==1 & is.na(aids_dt)=="FALSE" & aids_dt <= (FirstARTdate+30), "AIDS", ifelse(aids_y==1 & is.na(aids_dt)=="FALSE" & aids_dt > (FirstARTdate+30), "Unknown", ifelse(aids_y==1 & is.na(aids_dt)=="TRUE" & baseline_dt <= (FirstARTdate+30), "AIDS", ifelse(aids_y==1 & is.na(aids_dt)=="TRUE" & baseline_dt> (FirstARTdate+30), "Unknown", ifelse(aids_y==1, "Flag", ifelse(aids_y==0 & baseline_dt<=(FirstARTdate+30) & baseline_dt>=(FirstARTdate-PreART.window), "not AIDS", ifelse(aids_y==0 & baseline_dt<=(FirstARTdate+30) & baseline_dt<(FirstARTdate-PreART.window), "Unknown", ifelse(aids_y==0 & baseline_dt>(FirstARTdate-30), "Unknown", "Unknown")))))))))))) #Classify AIDS status at treatment initation using who + cdc staging variables in basic AIDSstatus$AIDSstatus.enroll<- with(AIDSstatus, ifelse(is.na(baseline_dt)=="TRUE", "No enroll data", ifelse(is.na(FirstARTdate)=="TRUE", "Not ART", ifelse(is.na(clinical.stage.cat)=="TRUE" | clinical.stage.cat=="Unknown", "Unknown", ifelse(clinical.stage.cat=="AIDS" & baseline_dt <= (FirstARTdate+30), "AIDS", ifelse(clinical.stage.cat=="AIDS" & baseline_dt> (FirstARTdate+30), "Unknown", ifelse(clinical.stage.cat=="AIDS", "Flag", ifelse(clinical.stage.cat=="not AIDS" & baseline_dt<=(FirstARTdate+30) & baseline_dt>=(FirstARTdate-PreART.window), "not AIDS", ifelse(clinical.stage.cat=="not AIDS" & baseline_dt<=(FirstARTdate+30) & baseline_dt<(FirstARTdate-PreART.window), "Unknown", ifelse(clinical.stage.cat=="not AIDS" & baseline_dt>(FirstARTdate-30), "Unknown", "Unknown")))))))))) #Will use data from visit table as primary source #Replace unknowns with data from aids_y variable #Replace remaining unknowns with data from clinical status variable in basic #Step 1 AIDSstatus$preART.aids<- with(AIDSstatus, ifelse(AIDSstatus.visit=="Unknown", AIDSstatus.reported, AIDSstatus.visit)) #Step 2 AIDSstatus$preART.aids<- with(AIDSstatus, ifelse(preART.aids=="Unknown", AIDSstatus.enroll, preART.aids)) #Replace non-AIDS with AIDS if AIDS for any visit AIDSstatus$preART.aids<-with(AIDSstatus, ifelse(preART.aids=="not AIDS" & (AIDSstatus.reported=="AIDS" | AIDSstatus.enroll=="AIDS"), "AIDS",preART.aids)) ############################################################################### ############################################################################### #Begin process of merging separate datasets together to create analysis dataset #R only allows merging one at a time ############################################################################### ############################################################################### temp2<-merge(basic,Artstartdate,by="site.patient",all=T) temp3<-merge(temp2,BaselineCD4data,by="site.patient",all=T) temp4<-merge(temp3,nadirCD4data,by="site.patient",all=T) temp5<-merge(temp4,follow.short,by="site.patient",all=T) temp6<-merge(temp5,AIDSstatus[,c("AIDSstatus.enroll","preART.aids","site.patient")], by="site.patient",all=T) Preliminary<-temp6 #Create analysis variables #age at ART initiation Preliminary$age<-as.numeric(floor((Preliminary$FirstARTdate-Preliminary$birth_dt)/365.25)) Preliminary$age50<-as.numeric(Preliminary$age>50) Preliminary$aids.initiation<-ifelse(Preliminary$preART.aids=="AIDS",1,ifelse(Preliminary$preART.aids=="not AIDS",0,NA)) Preliminary$followup.time<-as.numeric(Preliminary$censor_dt-Preliminary$FirstARTdate) Preliminary$Initiation.Year<-as.numeric(format(Preliminary$FirstARTdate, "%Y")) Preliminary$modetransmission<-as.factor(with(Preliminary,ifelse(mode=="Heterosexual contact", "Heterosexual", ifelse(mode=="Homosexual contact"|mode=="Bisexual","Homosexual/Bisexual", ifelse(mode=="Injecting drug user", "IDU", ifelse(mode=="Homo/Bisexual and Injecting drug user", "IDU", ifelse(mode=="Heterosexual contact and Injecting drug user","IDU", ifelse(mode=="Transfusion, nonhemophilia related","Other", ifelse(mode=="Perinatal" |mode=="Hemophiliac", "Other", ifelse(mode=="Other (specify in mode_oth)","Other", ifelse(mode=="Unknown","Unknown", as.character(mode)))))))))))) Preliminary$recart_y1<-ifelse(Preliminary$recart_y==9,1,Preliminary$recart_y) #Various exclusions Preliminary$Exclusion<- with(Preliminary, ifelse(is.na(FirstARTdate),"(-1): No ART data", ifelse(site=="chile?","(0): Chile", ifelse(is.na(age)==T |Preliminary$birth_dt=="1900-01-01", "(2a): no age info", ifelse(age<18 , "(2b): Age < 18", ifelse(recart_y1==1,"(3): Unconfirmed ART naive", ifelse(ARVclass=="FLAG","(1): not 1st line ART", ifelse(ARVclass=="nonHAART", "(1a:) First regimen not HAART", ifelse(followup.time<0 | is.na(followup.time)==T, "(99): Invalid follow-up time", ifelse(FirstARTdate0),] #Create list of study sites sitelist<-sort(unique(Analysis$site)) #Create deidentified site list sitelist2<-c("Site 1","Site 2","Site 3","Site 4","Site 5","Site 6","Site 7") #These variables have skewed distributions Analysis$sqrt.nadirCD4<-sqrt(Analysis$nadirCD4) Analysis$log.followup.time<-log(Analysis$followup.time) Analysis$time.event.interaction<-Analysis$log.followup.time*Analysis$death_y attach(Analysis) dd<-datadist(age,nadirCD4,male,Initiation.Year,ARVclass,aids.initiation,site) options(datadist='dd') d.analysis<-data.frame(age,sqrt.nadirCD4,male,Initiation.Year,ARVclass,aids.initiation,site,log.followup.time,death_y,time.event.interaction) #Perform multiple imputation library(mi) M<-10 IMP<-mi(d.analysis, n.imp=M,seed=20130912) # The actual imputation ############################################# ### Functions that are used through code ############################################# library(survival) library(coxme) library(rms) ##################################### #Function to perform meta analysis (used for approach #8) ##################################### #a: Function to run separate basic analyses by site CoxPH.country<-function(dataset,location){ dataset.country<-with(dataset,dataset[site==location,]) censoring.dat<-with(dataset.country, Surv(exp(log.followup.time),death_y)) coxph(censoring.dat~age +rcs(sqrt.nadirCD4,4) + male+rcs(Initiation.Year,4)+ARVclass +aids.initiation,data=dataset.country) } #b: Function to get composite effect estimate via meta analysis library(meta) library(rmeta) MetaVar<-function(dataset,variable,modified){ sitelist<-sort(unique(dataset$site)) logHR<-selogHR<-rep(NA,length(sitelist)) for (k in 1:length(sitelist)){ output<-CoxPH.country(dataset,sitelist[k]) modelvars<-names(output$coef) r<-data.frame(modelvars,1:length(modelvars)) varnumber<-r[r$modelvars==variable,2] #Extract log HR and SElogHR for each site logHR[k]<-output$coef[variable] selogHR[k]<-sqrt(diag(as.matrix(output$var[varnumber,varnumber]))) } metaresults<-metagen(logHR, selogHR, sm="HR") metaresults } ##################################### #Function to calculate instantaneous hazard ##################################### InstantaneousHazard<-function(modelno) { cumhaz<-basehaz(modelno,centered=FALSE) #Take the cumulaltive hazard and create variable for instantaneous hazard cumhaz$delta<-rep(NA,dim(cumhaz)[1]) #Loop 1: Calculate delta hazards for (i in 2:dim(cumhaz)[1]){ cumhaz$delta[i]<-(cumhaz$hazard[i]-cumhaz$hazard[i-1]) #basehaz() stacks baseline hazards for each strata in matrix #When use strata() term in coxph, need following command to appropriately calculate delta hazards if (dim(cumhaz)[2]==4) if (cumhaz$strata[i]!= cumhaz$strata[i-1]) cumhaz$delta[i]<-NA } #Remove all time points with no change in hazard cumhaz2<-cumhaz[cumhaz$delta!=0 & is.na(cumhaz$delta)==F,] cumhaz2$instantaneoush<-rep(NA,dim(cumhaz2)[1]) cumhaz2$instantaneoush[1]<-cumhaz2$delta[1]/cumhaz2$time[1] #Loop 2: Calculate instantaneous hazard for (j in 2:dim(cumhaz2)[1]){ cumhaz2$instantaneoush[j]<-cumhaz2$delta[j]/(cumhaz2$time[j]-cumhaz2$time[j-1]) #See above note re: strata() if (dim(cumhaz2)[2]==5) if (cumhaz2$strata[j]!= cumhaz2$strata[j-1]) cumhaz2$instantaneoush[j]<-cumhaz2$delta[j]/cumhaz2$time[j] } #Change units from per day to per year cumhaz2$instantaneoush<-cumhaz2$instantaneoush*365.25 cumhaz2$time<-cumhaz2$time/365.25 return(cumhaz2) } #Similar function to calculate baseline hazards for the random effects models from coxme() InstantaneousHazard2 <- function(coxfit) { #calculate Aalen's estimate of cum hazard event <- coxfit$y[,"status"] == 1 time <- coxfit$y[,"time"] risk <- exp(coxfit$linear.predictor) dt <- unique(time[event]) k <- length(dt) delta <- rep(0,k) for(i in 1:k) { delta[i] <- sum(event[time==dt[i]])/sum(risk[time >= dt[i]]) } temp<-data.frame(time=dt[order(dt)], delta=delta[order(dt)]) #Loop 2: Calculate hazard and instantaneous hazard instantaneoush<-hazard<-NULL hazard[1]<-temp$delta[1] for (j in 2:dim(temp)[1]){ instantaneoush[j]<-temp$delta[j]/(temp$time[j]-temp$time[j-1]) hazard[j]<-hazard[j-1]+temp$delta[j] } cumhaz3<-cbind(hazard,temp, instantaneoush)[-1,] #Change units from per day to per year cumhaz3$instantaneoush<-cumhaz3$instantaneoush*365.25 cumhaz3$time<-cumhaz3$time/365.25 return(cumhaz3) } #################################################################### ######Figure 1 for manuscript ##### Plotting baseline hazards when use separate models for each site ####################################################################### library(survival) Figure1ClusterLine<-function(sitelab,color,linetype=1,linewidth=1){ mod<-coxph(Surv(followup.time,death_y)~1,data=subset(Analysis,site==sitelab)) instanthaz<-InstantaneousHazard(mod) lines(lowess(instanthaz[,2],log(instanthaz[,4])),col=color,lty=1,lwd=1) } pdf(file="Figures/BaselineHazardSite.pdf") #postscript(file="Figures/BaselineHazardSite.eps") par(mfrow=c(1,1),mar=c(5,5,3.5,1.5)) plot(c(0,8),c(-7,0),type="n",xlab="Years from start of HAART",ylab="Estimated hazard of death (in years)",,axes=F,cex.lab=1.8) axis(1,cex.axis=1.6) axis(2,at=log(c(1,.1,.01,.001)),label=c(1,.1,.01,.001),cex.axis=1.6) Figure1ClusterLine("argentina",2) Figure1ClusterLine("brazil",3) Figure1ClusterLine("chile",4) Figure1ClusterLine("haiti",5) Figure1ClusterLine("honduras",6) Figure1ClusterLine("mexico",7) Figure1ClusterLine("peru",8) legend(5,-.1,as.factor(capitalize(as.character(sitelist))),col=c(2:8),lty=c(1,1,1,1,1,1,1),lwd=c(3,2,1,2,1.5,1.5,3)) box() dev.off() #################################################################### ######Figure 2 for manuscript #Single panel for six different approaches (marginal, fixed cluster, random cluster, random cluster + trt, stratified, meta) ####################################################################### library(survival) library(coxme) pdf(file = paste("Figures/BaselineHazardPanel(ADE).pdf",sep="")) par(mfrow=c(3,2),mar=c(0,0,0,0),oma=c(6,6,3,3)) ####################################################################### #Fig2A #Plot instantaneous hazard by var using an unadjusted model ####################################################################### #Naive model.2A<-coxph(Surv(followup.time,death_y)~aids.initiation,data=Analysis) model.2A.coef<-model.2A$coef model.2A.se<-sqrt(model.2A$var[1,1]) #Robust model.2AA<-coxph(Surv(followup.time,death_y)~aids.initiation + cluster(site),data=Analysis) model.2AA.coef<-model.2AA$coef model.2AA.se<-sqrt(model.2AA$var[1,1]) instanthaz.2A<-InstantaneousHazard(model.2A) plot(c(0,8),c(-7,1),type="n",xlab="",ylab="",,axes=F,cex.lab=1.8) text(-0.4,0.9,"(A): Marginal approach",pos=4) text(-0.4,0.4,paste("Naive HR: ", sprintf("%.2f",exp(model.2A.coef),2), " (",sprintf("%.2f",exp(model.2A.coef - 1.96*model.2A.se),2)," - ",sprintf("%.2f",exp(model.2A.coef + 1.96*model.2A.se),2),")",sep="") ,pos=4) text(-0.4,-0.1,paste("Robust HR: ", sprintf("%.2f",exp(model.2AA.coef),2), " (",sprintf("%.2f", exp(model.2AA.coef - 1.96*model.2AA.se),2)," - ",sprintf("%.2f", exp(model.2AA.coef + 1.96*model.2AA.se),2),")",sep="") ,pos=4) lines(lowess(instanthaz.2A[,2],log(instanthaz.2A[,4])),col=1) lines(lowess(instanthaz.2A[,2],log(instanthaz.2A[,4]*exp(model.2A$coef[1]))),col=1,lty=2) axis(2,at=log(c(1,.1,.01,.001)),label=c(1,.1,.01,.001),cex.axis=1.6) box() ####################################################################### #Fig2B #Plot instantaneous hazard by var using a fixed effects model ####################################################################### model.2B<-coxph(Surv(followup.time,death_y)~aids.initiation +(site),data=Analysis) model.2B.coef<-model.2B$coef[1] model.2B.se<-sqrt(model.2B$var[1,1]) instanthaz.2B<-InstantaneousHazard(model.2B) plot(c(0,8),c(-7,1),type="n",xlab="",ylab="",,axes=F,cex.lab=1.8) text(-0.4,0.9,"(B): Fixed cluster effects approach",pos=4) text(-0.4,0.4,paste("HR: ", sprintf("%.2f",exp(model.2B.coef),2), " (",sprintf("%.2f",exp(model.2B.coef - 1.96*model.2B.se),2)," - ",sprintf("%.2f",exp(model.2B.coef + 1.96*model.2B.se),2),")",sep="") ,pos=4) lines(lowess(instanthaz.2B[,2],log(instanthaz.2B[,4])),col=2) lines(lowess(instanthaz.2B[,2],log(instanthaz.2B[,4]*exp(model.2B$coef[2]))),col=3) lines(lowess(instanthaz.2B[,2],log(instanthaz.2B[,4]*exp(model.2B$coef[3]))),col=4) lines(lowess(instanthaz.2B[,2],log(instanthaz.2B[,4]*exp(model.2B$coef[4]))),col=5) lines(lowess(instanthaz.2B[,2],log(instanthaz.2B[,4]*exp(model.2B$coef[5]))),col=6) lines(lowess(instanthaz.2B[,2],log(instanthaz.2B[,4]*exp(model.2B$coef[6]))),col=7) lines(lowess(instanthaz.2B[,2],log(instanthaz.2B[,4]*exp(model.2B$coef[7]))),col=8) lines(lowess(instanthaz.2B[,2],log(instanthaz.2B[,4]*exp(model.2B$coef[1]))),col=2,lty=2) lines(lowess(instanthaz.2B[,2],log(instanthaz.2B[,4]*exp(model.2B$coef[1]+ model.2B$coef[2]))),col=3,lty=2) lines(lowess(instanthaz.2B[,2],log(instanthaz.2B[,4]*exp(model.2B$coef[1]+ model.2B$coef[3]))),col=4,lty=2) lines(lowess(instanthaz.2B[,2],log(instanthaz.2B[,4]*exp(model.2B$coef[1]+ model.2B$coef[4]))),col=5,lty=2) lines(lowess(instanthaz.2B[,2],log(instanthaz.2B[,4]*exp(model.2B$coef[1]+ model.2B$coef[5]))),col=6,lty=2) lines(lowess(instanthaz.2B[,2],log(instanthaz.2B[,4]*exp(model.2B$coef[1]+ model.2B$coef[6]))),col=7,lty=2) lines(lowess(instanthaz.2B[,2],log(instanthaz.2B[,4]*exp(model.2B$coef[1]+ model.2B$coef[7]))),col=8,lty=2) legend(5,1.4,as.factor(capitalize(as.character(sitelist))),fill=c(2:8)) box() ####################################################################### #Fig2C #Plot instantaneous hazard by var using a random cluster effects model ####################################################################### model.2C<-coxme(Surv(followup.time,death_y)~aids.initiation +(1|site),data=Analysis) model.2C.coef<-model.2C$coef[1] model.2C.se<-sqrt(model.2C$var[1+length(sitelist),1+length(sitelist)]) instanthaz.2C<-InstantaneousHazard2(model.2C) plot(c(0,8),c(-7,1),type="n",xlab="",ylab="",,axes=F,cex.lab=1.8) text(-0.4,0.9,"(C): Gaussian-distributed random cluster effects approach",pos=4) text(-0.4,0.4,paste("HR: ", sprintf("%.2f",exp(model.2C.coef),2), " (",sprintf("%.2f",exp(model.2C.coef - 1.96*model.2C.se),2)," - ",sprintf("%.2f",exp(model.2C.coef + 1.96*model.2C.se),2),")",sep="") ,pos=4) lines(lowess(instanthaz.2C[,2],log(instanthaz.2C[,4]*exp(ranef(model.2C)$site[1]))),col=2) lines(lowess(instanthaz.2C[,2],log(instanthaz.2C[,4]*exp(ranef(model.2C)$site[2]))),col=3) lines(lowess(instanthaz.2C[,2],log(instanthaz.2C[,4]*exp(ranef(model.2C)$site[3]))),col=4) lines(lowess(instanthaz.2C[,2],log(instanthaz.2C[,4]*exp(ranef(model.2C)$site[4]))),col=5) lines(lowess(instanthaz.2C[,2],log(instanthaz.2C[,4]*exp(ranef(model.2C)$site[5]))),col=6) lines(lowess(instanthaz.2C[,2],log(instanthaz.2C[,4]*exp(ranef(model.2C)$site[6]))),col=7) lines(lowess(instanthaz.2C[,2],log(instanthaz.2C[,4]*exp(ranef(model.2C)$site[7]))),col=8) lines(lowess(instanthaz.2C[,2],log(instanthaz.2C[,4]*exp(model.2C$coef[1] + ranef(model.2C)$site[1]))),col=2,lty=2) lines(lowess(instanthaz.2C[,2],log(instanthaz.2C[,4]*exp(model.2C$coef[1] + ranef(model.2C)$site[2]))),col=3,lty=2) lines(lowess(instanthaz.2C[,2],log(instanthaz.2C[,4]*exp(model.2C$coef[1] + ranef(model.2C)$site[3]))),col=4,lty=2) lines(lowess(instanthaz.2C[,2],log(instanthaz.2C[,4]*exp(model.2C$coef[1] + ranef(model.2C)$site[4]))),col=5,lty=2) lines(lowess(instanthaz.2C[,2],log(instanthaz.2C[,4]*exp(model.2C$coef[1] + ranef(model.2C)$site[5]))),col=6,lty=2) lines(lowess(instanthaz.2C[,2],log(instanthaz.2C[,4]*exp(model.2C$coef[1] + ranef(model.2C)$site[6]))),col=7,lty=2) lines(lowess(instanthaz.2C[,2],log(instanthaz.2C[,4]*exp(model.2C$coef[1] + ranef(model.2C)$site[7]))),col=8,lty=2) axis(2,at=log(c(1,.1,.01,.001)),label=c(1,.1,.01,.001),cex.axis=1.6) box() ####################################################################### #Fig2D #Plot instantaneous hazard by var using a random cluster and treatment effects model ####################################################################### model.2D<-coxme(Surv(followup.time,death_y)~aids.initiation +(1 + aids.initiation|site),data=Analysis) model.2D.coef<-model.2D$coef model.2D.se<-sqrt(model.2D$var[1+2*length(sitelist),1+2*length(sitelist)]) instanthaz.2D<-InstantaneousHazard2(model.2D) plot(c(0,8),c(-7,1),type="n",xlab="",ylab="",,axes=F,cex.lab=1.8) text(-0.4,0.9,"(D): Random cluster and treatment effects approach",pos=4) text(-0.4,0.4,paste("HR: ", sprintf("%.2f",exp(model.2D.coef),2), " (",sprintf("%.2f",exp(model.2D.coef - 1.96*model.2D.se),2)," - ",sprintf("%.2f",exp(model.2D.coef + 1.96*model.2D.se),2),")",sep="") ,pos=4) #Function to plot baseline hazards for each cluster RanClustTrtLines<-function(sitelab,color){ lines(lowess(instanthaz.2D[,2],log(instanthaz.2D[,4]*exp(ranef(model.2D)$site[sitelab,1]))),col=color) lines(lowess(instanthaz.2D[,2],log(instanthaz.2D[,4]*exp(model.2D$coef[1] + ranef(model.2D)$site[sitelab,1] + ranef(model.2D)$site[sitelab,2]))),col=color,lty=2) } RanClustTrtLines(1,color=2) #argentina RanClustTrtLines(2,color=3) #brazil RanClustTrtLines(3,color=4) #chile RanClustTrtLines(4,color=5) #haiti RanClustTrtLines(5,color=6) #honduras RanClustTrtLines(6,color=7) #mexico RanClustTrtLines(7,color=8) #peru box() ####################################################################### #Fig2E #Plot instantaneous hazard by var using a strata effects model ####################################################################### model.2E<-coxph(Surv(followup.time,death_y)~aids.initiation+strata(site),data=Analysis) model.2E.coef<-model.2E$coef[1] model.2E.se<-sqrt(model.2E$var[1,1]) instanthaz.2E<-InstantaneousHazard(model.2E) plot(c(0,8),c(-7,1),type="n",xlab="",ylab="",,axes=F,cex.lab=1.8) text(-0.4,0.9,"(E): Stratified approach",pos=4) text(-0.4,0.4,paste("HR: ", sprintf("%.2f",exp(model.2E.coef),2), " (",sprintf("%.2f",exp(model.2E.coef - 1.96*model.2E.se),2)," - ",sprintf("%.2f",exp(model.2E.coef + 1.96*model.2E.se),2),")",sep="") ,pos=4) axis(1,cex.axis=1.6) axis(2,at=log(c(1,.1,.01,.001)),label=c(1,.1,.01,.001),cex.axis=1.6) #Function to plot baseline hazards for each strata StrataLines<-function(sitelab,color){ lines(lowess(instanthaz.2E[instanthaz.2E$strata==sitelab,2],log(instanthaz.2E[instanthaz.2E$strata==sitelab,5])),col=color) lines(lowess(instanthaz.2E[instanthaz.2E$strata==sitelab,2],log(instanthaz.2E[instanthaz.2E$strata==sitelab,5]*exp(model.2E$coef[1]))),col=color,lty=2) } StrataLines("site=argentina",color=2) StrataLines("site=brazil",color=3) StrataLines("site=chile",color=4) StrataLines("site=haiti",color=5) StrataLines("site=honduras",color=6) StrataLines("site=mexico",color=7) StrataLines("site=peru",color=8) box() ####################################################################### #Fig2F #Plot instantaneous hazard by var using a multiple model approach ######################################################################## model.2F.1<-coxph(Surv(followup.time,death_y)~aids.initiation,data=subset(Analysis,site=="argentina")) model.2F.2<-coxph(Surv(followup.time,death_y)~aids.initiation,data=subset(Analysis,site=="brazil")) model.2F.3<-coxph(Surv(followup.time,death_y)~aids.initiation,data=subset(Analysis,site=="chile")) model.2F.4<-coxph(Surv(followup.time,death_y)~aids.initiation,data=subset(Analysis,site=="haiti")) model.2F.5<-coxph(Surv(followup.time,death_y)~aids.initiation,data=subset(Analysis,site=="honduras")) model.2F.6<-coxph(Surv(followup.time,death_y)~aids.initiation,data=subset(Analysis,site=="mexico")) model.2F.7<-coxph(Surv(followup.time,death_y)~aids.initiation,data=subset(Analysis,site=="peru")) #Calculate instanthaz.2F.1<-InstantaneousHazard(model.2F.1) instanthaz.2F.2<-InstantaneousHazard(model.2F.2) instanthaz.2F.3<-InstantaneousHazard(model.2F.3) instanthaz.2F.4<-InstantaneousHazard(model.2F.4) instanthaz.2F.5<-InstantaneousHazard(model.2F.5) instanthaz.2F.6<-InstantaneousHazard(model.2F.6) instanthaz.2F.7<-InstantaneousHazard(model.2F.7) model.2F.coef.vector<-c(model.2F.1$coef,model.2F.2$coef,model.2F.3$coef,model.2F.4$coef,model.2F.5$coef,model.2F.6$coef,model.2F.7$coef) model.2F.se.vector<-c(sqrt(model.2F.1$var[1,1]),sqrt(model.2F.2$var[1,1]),sqrt(model.2F.3$var[1,1]),sqrt(model.2F.4$var[1,1]),sqrt(model.2F.5$var[1,1]),sqrt(model.2F.6$var[1,1]),sqrt(model.2F.7$var[1,1])) model.2F.coef<-metagen(model.2F.coef.vector, model.2F.se.vector, sm="HR")$TE.random model.2F.se<-metagen(model.2F.coef.vector, model.2F.se.vector, sm="HR")$seTE.random plot(c(0,8),c(-7,1),type="n",xlab="",ylab="",,axes=F,cex.lab=1.8) text(-0.4,0.9,"(F): Multiple model approach",pos=4) text(-0.4,0.4,paste("HR: ", sprintf("%.2f",exp(model.2F.coef),2), " (",sprintf("%.2f",exp(model.2F.coef - 1.96*model.2F.se),2)," - ",sprintf("%.2f",exp(model.2F.coef + 1.96*model.2F.se),2),")",sep="") ,pos=4) axis(1,cex.axis=1.6) lines(lowess(instanthaz.2F.1[,2],log(instanthaz.2F.1[,4])),col=2) lines(lowess(instanthaz.2F.2[,2],log(instanthaz.2F.2[,4])),col=3) lines(lowess(instanthaz.2F.3[,2],log(instanthaz.2F.3[,4])),col=4) lines(lowess(instanthaz.2F.4[,2],log(instanthaz.2F.4[,4])),col=5) lines(lowess(instanthaz.2F.5[,2],log(instanthaz.2F.5[,4])),col=6) lines(lowess(instanthaz.2F.6[,2],log(instanthaz.2F.6[,4])),col=7) lines(lowess(instanthaz.2F.7[,2],log(instanthaz.2F.7[,4])),col=8) lines(lowess(instanthaz.2F.1[,2],log(instanthaz.2F.1[,4]*exp(model.2F.1$coef[1]))),col=2,lty=2) lines(lowess(instanthaz.2F.2[,2],log(instanthaz.2F.2[,4]*exp(model.2F.2$coef[1]))),col=3,lty=2) lines(lowess(instanthaz.2F.3[,2],log(instanthaz.2F.3[,4]*exp(model.2F.3$coef[1]))),col=4,lty=2) lines(lowess(instanthaz.2F.4[,2],log(instanthaz.2F.4[,4]*exp(model.2F.4$coef[1]))),col=5,lty=2) lines(lowess(instanthaz.2F.5[,2],log(instanthaz.2F.5[,4]*exp(model.2F.5$coef[1]))),col=6,lty=2) lines(lowess(instanthaz.2F.6[,2],log(instanthaz.2F.6[,4]*exp(model.2F.6$coef[1]))),col=7,lty=2) lines(lowess(instanthaz.2F.7[,2],log(instanthaz.2F.7[,4]*exp(model.2F.7$coef[1]))),col=8,lty=2) box() mtext('Estimated hazard of death (in years)', side=2, outer=TRUE,line=4,cex=2) mtext('Years from start of HAART', side=1, outer=TRUE,line=4,cex=2) dev.off() #################################################################### #Generate composite summary table of results #################################################################### ResultsTable<-function(imputed.data,modified,modstrategy){ number.models<-ifelse(modstrategy <4, 8,6) logHR.all<-selogHR.all<-array(NA,c(M,number.models,3)) for (p in 1:M) { model.one<-model.two<-model.three<-model.four<-model.five<-model.six<-model.seven<-model.eight<-NULL model.eight.age<-model.eight.male<-model.eight.aids.initiation<-NULL d.imp<-data.frame(mi.data.frame(imputed.data,m=p)) #Switch sites for sensitivity analysis! if (modified=="modified" & modstrategy==1) d.imp$site<-Analysis$siteswitch1 #Completely random switch if (modified=="modified" & modstrategy==2) d.imp$site<-Analysis$siteswitch2 #Switch based on baseline hazards if (modified=="modified" & modstrategy==.2) d.imp$site<-Analysis$siteswitch22 #Switch based on baseline hazards if (modified=="modified" & modstrategy==3) d.imp$site<-Analysis$siteswitch3 #Switch based on HR delta if (modified=="modified" & modstrategy==4.1) d.imp$site<-Analysis$siteswitch.size50 #Switch based on subsets of 50 if (modified=="modified" & modstrategy==4.2) d.imp$site<-Analysis$siteswitch.size100 #Switch based on subsets of 100 if (modified=="modified" & modstrategy==4.3) d.imp$site<-Analysis$siteswitch.size200 #Switch based on subsets of 200 if (modified=="modified" & modstrategy==4.4) d.imp$site<-Analysis$siteswitch.size500 #Switch based on subsets of 500 #modstrategy does not matter for original analysis imp.censoring.data<-with(d.imp,Surv(exp(log.followup.time), death_y)) sitelist<-sort(unique(d.imp$site)) #Approach one: Crude, unadjusted for site model.one<-coxph(imp.censoring.data~age +rcs(sqrt.nadirCD4,4) + male+rcs(Initiation.Year,4) + ARVclass + aids.initiation,data=d.imp) #Approach two: Use 'cluster' variable in R model.two<-coxph(imp.censoring.data~age +rcs(sqrt.nadirCD4,4) + male+rcs(Initiation.Year,4) + (ARVclass) + aids.initiation + cluster(site),data=d.imp) #Approach three: Fixed effect -- include each site as indicator variable model.three<-coxph(imp.censoring.data~age +rcs(sqrt.nadirCD4,4) + male+rcs(Initiation.Year,4) + ARVclass + aids.initiation +site,data=d.imp) #Approach four: Use 'frailty' variable in R model.four<-coxph(imp.censoring.data~age +rcs(sqrt.nadirCD4,4)+ male+rcs(Initiation.Year,4) + ARVclass + aids.initiation + frailty(site),data=d.imp) #Approach five: Use 'coxme' for mixed effect model #Gaussian Random Effects for coxme model.five<-coxme(imp.censoring.data~age + rcs(sqrt.nadirCD4,4) + male+rcs(Initiation.Year,4) + ARVclass +aids.initiation +(1|site),data=d.imp) #Approach seven: Use 'strata' variable in R model.seven<-coxph(imp.censoring.data~age +rcs(sqrt.nadirCD4,4) + male+rcs(Initiation.Year,4)+ARVclass + aids.initiation + strata(site),data=d.imp) int<-4 #number of treatment random effects + 1 for intercept #Can omit some models when we do subset analysis because the primary comparisons are fixed v random effects if (modstrategy<4) { #Approach six: Random intercept (site) and slopes model model.six<-coxme(imp.censoring.data~age + rcs(sqrt.nadirCD4,4) + male+rcs(Initiation.Year,4) + ARVclass + aids.initiation +(1|site) + (age|site) + (male|site) +(aids.initiation| site),data=d.imp) #Run meta analysis for each variable of interest model.eight.age<-MetaVar(d.imp,"age",modified) model.eight.male<-MetaVar(d.imp,"male",modified) model.eight.aids.initiation<-MetaVar(d.imp,"aids.initiation",modified) } #Want to output results for three variables (age, sex, and ADE) #Later, we extract values from matrices and vectors #Following commands help us identify the row/columns for these three variables modelvars<-names(model.one$coef) r<-data.frame(modelvars,1:length(modelvars)) varnumber<-r[r$modelvars=="age" |r$modelvars=="male" | r$modelvars=="aids.initiation",2] #log HRs for variable of interest logHR.mod1<-model.one$coef[varnumber] logHR.mod2<-model.two$coef[varnumber] logHR.mod3<-model.three$coef[varnumber] logHR.mod4<-model.four$coef[varnumber] logHR.mod5<-model.five$coef[varnumber] logHR.mod6<-model.six$coef[varnumber] logHR.mod7<-model.seven$coef[varnumber] logHR.mod8<-c(model.eight.age$TE.random,model.eight.male$TE.random,model.eight.aids.initiation$TE.random) #SEs of log HR for age selogHR.mod1<-sqrt(diag(model.one$var[varnumber,varnumber])) selogHR.mod2<-sqrt(diag(model.two$var[varnumber,varnumber])) selogHR.mod3<-sqrt(diag(model.three$var[varnumber,varnumber])) selogHR.mod4<-sqrt(diag(model.four$var[varnumber,varnumber])) selogHR.mod5<-sqrt(diag(model.five$var[varnumber+length(sitelist),varnumber+length(sitelist)])) selogHR.mod6<-sqrt(diag(model.six$var[varnumber+int*length(sitelist),varnumber+int*length(sitelist)])) selogHR.mod7<-sqrt(diag(model.seven$var[varnumber,varnumber])) selogHR.mod8<-c(model.eight.age$seTE.random,model.eight.male$seTE.random,model.eight.aids.initiation$seTE.random) #Special case for tables that do not require models 6 or 8 if (modstrategy>4) selogHR.mod6<-NULL #Combine log HRs/HRs logHR.all[p,,]<-rbind(logHR.mod1,logHR.mod2,logHR.mod3,logHR.mod4,logHR.mod5,logHR.mod6,logHR.mod7,logHR.mod8) selogHR.all[p,,]<-rbind(selogHR.mod1,selogHR.mod2,selogHR.mod3,selogHR.mod4,selogHR.mod5,selogHR.mod6,selogHR.mod7,selogHR.mod8) } logHR<-colMeans(logHR.all) selogHR<-matrix(NA,number.models,3) for (j in 1:length(varnumber)){ selogHR[,j]<-sqrt(apply(selogHR.all[,,j]^2,2,mean)+(M+1)*apply(logHR.all[,,j],2,var)/M) } #Create output table #Combine SE of log HRs, create lower upper 95CI CIs using asymptotics output<-array(NA,c(3,number.models,3)) output[1,,]<-exp(logHR) output[2,,]<-exp(logHR-1.96*selogHR) output[3,,]<-exp(logHR+1.96*selogHR) if (modstrategy <4) colnames(output)<-c("Crude","Cluster","Fixed Effects","Frailty","CoxME (RI)","CoxME (RIRS)","Strata","Meta") if (modstrategy >4) colnames(output)<-c("Crude","Cluster","Fixed Effects","Frailty","CoxME (RI)","Strata") rownames(output)<-c("HR","lower 95CI","upper 95CI") return(output) } #Run Results Generation Function PrimaryResults<-ResultsTable(IMP,"truth",0) #Generate separate table for AIDS variable AIDS.SummaryTable<-t(PrimaryResults[,,3]) #################################################################### #Set up sensitivity analysis #Generate 'new' sites via sampling based upon follow-up time, event, and AIDS status #Keep sample size same for each site #################################################################### #Function to randomly shuffle cluster assignment for subgroups of patients ShuffleFn<-function(mat,modtype,cluster1,cluster2,column,size){ tempvec<-cbind(Analysis[mat[,column]==1,], sample(c(rep(cluster1,sum(mat[,column])-size),rep(cluster2,size)))) colnames(tempvec)<-c(colnames(Analysis),modtype) return(tempvec) } ######################################################### #Strategy 1: Same sample size per site, but completely random shuffling ######################################################### set.seed(455) Analysis$siteswitch1<-sample(Analysis$site) ######################################################### #Strategy 2: Alter baseline hazard functions for each site ######################################################### set.seed(455) subgroup.1<-as.numeric(with(Analysis,death_y==1&followup.time<200 & site=="brazil" )) subgroup.2<-as.numeric(with(Analysis,death_y==0&followup.time>2250 & site=="brazil" )) subgroup.3<-as.numeric(with(Analysis,death_y==0&followup.time<1500 & site=="honduras" )) subgroup.4<-as.numeric(with(Analysis,death_y==1&followup.time>1000 & site=="honduras" )) subgroup.5<-as.numeric(with(Analysis,death_y==1&followup.time<1000 & followup.time>200 & site=="brazil" )) subgroup.6<-as.numeric(with(Analysis,death_y==0&followup.time<2250 & followup.time>1250 & site=="brazil")) subgroup.7<-as.numeric(with(Analysis,death_y==1&followup.time>1000 & site=="haiti" )) subgroup.8<-as.numeric(with(Analysis,death_y==0&followup.time<300 & site=="haiti" )) subgroup.9<-as.numeric(with(Analysis,death_y==1&followup.time<500 & site=="peru" )) subgroup.10<-as.numeric(with(Analysis,death_y==0&followup.time>1500 & site=="honduras" )) subgroup.matrix<-cbind(subgroup.1,subgroup.2,subgroup.3,subgroup.4,subgroup.5,subgroup.6,subgroup.7,subgroup.8,subgroup.9,subgroup.10) subgroup.11<-1-apply(subgroup.matrix,1,sum) #Add column with indicator for all patients not in other subgroups subgroup.matrix<-cbind(subgroup.matrix,subgroup.11) temp1<-ShuffleFn(subgroup.matrix,"siteswitch2","brazil","honduras",1,60) temp2<-ShuffleFn(subgroup.matrix,"siteswitch2","brazil","honduras",2,290) temp3<-ShuffleFn(subgroup.matrix,"siteswitch2","honduras","brazil",3,330) temp4<-ShuffleFn(subgroup.matrix,"siteswitch2","honduras","brazil",4,20) temp5<-ShuffleFn(subgroup.matrix,"siteswitch2","brazil","haiti",5,45) temp6<-ShuffleFn(subgroup.matrix,"siteswitch2","brazil","haiti",6,440) temp7<-ShuffleFn(subgroup.matrix,"siteswitch2","haiti","brazil",7,105) temp8<-ShuffleFn(subgroup.matrix,"siteswitch2","haiti","brazil",8,380) temp9<-ShuffleFn(subgroup.matrix,"siteswitch2","peru","honduras",9,75) temp10<-ShuffleFn(subgroup.matrix,"siteswitch2","honduras","peru",10,75) temp11<-cbind(Analysis[subgroup.matrix[,11]==1,], Analysis[subgroup.matrix[,11]==1,"site"]) colnames(temp11)<-c(colnames(Analysis),"siteswitch2") shuffled.Analysis<-rbind(temp1,temp2,temp3,temp4,temp5,temp6,temp7,temp8,temp9,temp10,temp11) Analysis<-shuffled.Analysis[order(shuffled.Analysis$order),] Analysis$siteswitch2<-as.factor(as.character(Analysis$siteswitch2)) ######################################################### #Strategy 3: Try to get different results without more extreme treatment effects ######################################################### set.seed(455) subgroup.1<-as.numeric(with(Analysis,death_y==1& aids.initiation==1 & is.na(aids.initiation)==F & site=="haiti" )) subgroup.2<-as.numeric(with(Analysis,death_y==0&aids.initiation==1 & is.na(aids.initiation)==F & site=="brazil" )) subgroup.matrix<-cbind(subgroup.1,subgroup.2) subgroup.3<-1-apply(subgroup.matrix,1,sum) #Add column with indicator for all patients not in other subgroups subgroup.matrix<-cbind(subgroup.matrix,subgroup.3) if (sum(subgroup.matrix)!=dim(Analysis)[1]) print("Subsetting is not disjoint") temp1<-ShuffleFn(subgroup.matrix,"siteswitch3","haiti","brazil",1,250) temp2<-ShuffleFn(subgroup.matrix,"siteswitch3","brazil","haiti",2,250) temp3<-cbind(Analysis[subgroup.matrix[,3]==1,], Analysis[subgroup.matrix[,3]==1,"site"]) colnames(temp3)<-c(colnames(Analysis),"siteswitch3") shuffled.Analysis<-rbind(temp1,temp2,temp3) Analysis<-shuffled.Analysis[order(shuffled.Analysis$order),] Analysis$siteswitch3<-as.factor(as.character(Analysis$siteswitch3)) ######################################################### #Strategy 4: Increase number of clusters by subsetting each country ######################################################### subgroup.1<-as.numeric(with(Analysis,site=="argentina" )) subgroup.2<-as.numeric(with(Analysis,site=="brazil" )) subgroup.3<-as.numeric(with(Analysis,site=="chile" )) subgroup.4<-as.numeric(with(Analysis,site=="haiti" )) subgroup.5<-as.numeric(with(Analysis,site=="honduras" )) subgroup.6<-as.numeric(with(Analysis,site=="mexico" )) subgroup.7<-as.numeric(with(Analysis,site=="peru" )) subgroup.matrix<-cbind(subgroup.1,subgroup.2,subgroup.3,subgroup.4,subgroup.5,subgroup.6,subgroup.7) subgroup.8<-1-apply(subgroup.matrix,1,sum) #Add column with indicator for all patients not in other subgroups subgroup.matrix<-cbind(subgroup.matrix,subgroup.8) #Function to create subsets for each site SubsetPermutations<-function(subsample,site,newgroupsize){ sitefrequency<-sum(subsample) num.subsets<-floor(sitefrequency/newgroupsize) subgroup.sample<-sample(c(rep(paste(site,1:(num.subsets),sep=""),c(sitefrequency-(num.subsets-1)*newgroupsize,rep(newgroupsize,(num.subsets-1)))))) } SubsetAssignment<-function(subgroup.matrix,recgroupsize){ set.seed(455) shuffled.Analysis<-NULL for (j in 1:dim(subgroup.matrix)[2]){ subsetted.Analysis<-Analysis[subgroup.matrix[,j]==1,] subsetted.site<-subsetted.Analysis[1,"site"] if (j3500 & site=="haiti" )) subgroup.4<-as.numeric(with(Analysis,death_y==1&followup.time>1000 & site=="honduras" )) subgroup.5<-as.numeric(with(Analysis,death_y==1&followup.time<500 & followup.time>100 & site=="haiti" )) subgroup.6<-as.numeric(with(Analysis,death_y==0&followup.time<1000 & followup.time>750 & site=="argentina" )) subgroup.7<-as.numeric(with(Analysis,death_y==1&followup.time>1000 & site=="brazil" )) subgroup.8<-as.numeric(with(Analysis,death_y==0&followup.time<750 & site=="argentina" )) subgroup.9<-as.numeric(with(Analysis,death_y==0&followup.time<1000 & site=="peru" )) subgroup.10<-as.numeric(with(Analysis,death_y==0&followup.time>2500 &followup.time<3500 & site=="haiti" )) subgroup.11<-as.numeric(with(Analysis,death_y==0&followup.time>1500 &followup.time<2500 & site=="haiti" )) subgroup.12<-as.numeric(with(Analysis,death_y==1&followup.time>1000 & site=="chile" )) subgroup.13<-as.numeric(with(Analysis,death_y==0&followup.time<1250 & site=="chile" )) subgroup.matrix<-cbind(subgroup.1,subgroup.2,subgroup.3,subgroup.4,subgroup.5,subgroup.6,subgroup.7,subgroup.8,subgroup.9,subgroup.10,subgroup.11,subgroup.12,subgroup.13) subgroup.14<-1-apply(subgroup.matrix,1,sum) #Add column with indicator for all patients not in other subgroups subgroup.matrix<-cbind(subgroup.matrix,subgroup.14) if (sum(subgroup.matrix)!=dim(Analysis)[1]) print("Subsetting is not disjoint") temp1<-ShuffleFn(subgroup.matrix,"siteswitch22","haiti","honduras",1,285) temp2<-ShuffleFn(subgroup.matrix,"siteswitch22","honduras","haiti",2,365) temp3<-ShuffleFn(subgroup.matrix,"siteswitch22","haiti","honduras",3,100) temp4<-ShuffleFn(subgroup.matrix,"siteswitch22","honduras","haiti",4,20) temp5<-ShuffleFn(subgroup.matrix,"siteswitch22","haiti","argentina",5,220) temp6<-ShuffleFn(subgroup.matrix,"siteswitch22","argentina","brazil",6,50) temp7<-ShuffleFn(subgroup.matrix,"siteswitch22","brazil","haiti",7,50) temp8<-cbind(Analysis[subgroup.matrix[,8]==1,], sample(c(rep("argentina",sum(subgroup.matrix[,8])-170),rep("haiti",170)))) temp8<-ShuffleFn(subgroup.matrix,"siteswitch22","argentina","haiti",8,170) temp9<-ShuffleFn(subgroup.matrix,"siteswitch22","peru","haiti",9,800) temp10<-ShuffleFn(subgroup.matrix,"siteswitch22","haiti","peru",10,800) temp11<-ShuffleFn(subgroup.matrix,"siteswitch22","haiti","chile",11,250) temp12<-ShuffleFn(subgroup.matrix,"siteswitch22","chile","haiti",12,25) temp13<-ShuffleFn(subgroup.matrix,"siteswitch22","chile","haiti",13,225) temp14<-cbind(Analysis[subgroup.matrix[,14]==1,], Analysis[subgroup.matrix[,14]==1,"site"]) colnames(temp14)<-c(colnames(Analysis),"siteswitch22") shuffled.Analysis<-rbind(temp1,temp2,temp3,temp4,temp5,temp6,temp7,temp8,temp9,temp10,temp11,temp12,temp13,temp14) Analysis<-shuffled.Analysis[order(shuffled.Analysis$order),] Analysis$siteswitch22<-as.factor(as.character(Analysis$siteswitch22)) #How many sites changed for modification #2 #Baseline hazards (alt) Change.Table22<-table(Analysis$site,Analysis$siteswitch22) N.change22<-sum(Change.Table22) - sum(diag(Change.Table22)) ModResults.22<-ResultsTable(IMP,"modified",.2) AIDS.SummaryTable.Modified22<-t(ModResults.22[,,3]) pdf(file = paste("Figures/BaselineHazardModifiedFigure3(alt).pdf",sep="")) par(mfrow=c(1,1),mar=c(0,0,0,0),oma=c(6,6,3,3)) ModifiedHazardPlot("siteswitch22","2(alt)") axis(1,cex.axis=1.6) axis(2,at=log(c(1,.1,.01,.001)),label=c(1,.1,.01,.001),cex.axis=1.6) mtext('Estimated hazard of death (in years)', side=2, outer=TRUE,line=4,cex=2) mtext('Years from start of HAART', side=1, outer=TRUE,line=4,cex=2) legend(4.5,1.0,as.factor(sort(capitalize(as.character(sitelist2)))),fill=c(2:8)) dev.off()