######  This file performs the multiple imputation analyses of Cole et al. to data from CNICS+Vanderbilt.
######  At the end of this code, we also read in some of the results from the dynamic marginal
######  structural models of Hernan et al. using the same data, to create comparison figures.


######  Reading in the data
rm(list=ls())
setwd("/Volumes/encrypted/cnics/analyses-2013-Nov")

rf<-read.csv("RiskFactor2.csv")
mort<-read.csv("Mortality2.csv")
med<-read.csv("Medication2.csv")
lab<-read.csv("Lab2.csv")
dia<-read.csv("Diagnosis2.csv")
dems<-read.csv("Demographic2.csv")

dim(dems)
length(unique(dems$StudyId))


##### Looking at ART table first; creating uniform drug abbreviations

head(med)
table(med$MedicationName)
dim(med)
med<-med[med$MedicationName!="ADEFOVIR",]
dim(med)
length(unique(med$StudyId))    

art<-med$MedicationName
art1<-ifelse(art=="ABACAVIR","ABC",
             ifelse(art=="ADEFOVIR","hep B drug",
                    ifelse(art=="AMPRENAVIR","APV",
                           ifelse(art=="ATAZANAVIR","ATZ",
                                  ifelse(art=="ATRIPLA","EFV,FTC,TDF",
                                         ifelse(art=="COMBIVIR","3TC,AZT",
                                                ifelse(art=="COMPLERA","FTC,RPV,TDF",
                                                       ifelse(art=="DARUNAVIR","DRV",
                                                              ifelse(art=="DELAVIRDINE","DLV",
                                                                     ifelse(art=="DIDANOSINE","DDI",
                                                                            ifelse(art=="EFAVIRENZ","EFV",
                                                                                   ifelse(art=="EMTRICITABINE","FTC",
                                                                                          ifelse(art=="ENFUVIRTIDE","INN",
                                                                                                 ifelse(art=="EPZICOM","3TC,AZT",
                                                                                                        ifelse(art=="ETRAVIRINE","ETR",
                                                                                                               ifelse(art=="FOSAMPRENAVIR","FPV", 
                                                                                                                      ifelse(art=="Hx ARV Treatment, Med unknown","unknown ARV",
                                                                                                                             ifelse(art=="Hx PI Treatment, Med unknown","unknown PI",
                                                                                                                                    ifelse(art=="INDINAVIR","IDV",
                                                                                                                                           ifelse(art=="KALETRA","LPV,RTV",
                                                                                                                                                  ifelse(art=="LAMIVUDINE","3TC",
                                                                                                                                                         ifelse(art=="LOPINAVIR","LPV",
                                                                                                                                                                ifelse(art=="MARAVIROC","MRV",
                                                                                                                                                                       ifelse(art=="NELFINAVIR","NFV",
                                                                                                                                                                              ifelse(art=="NEVIRAPINE","NVP",
                                                                                                                                                                                     ifelse(art=="RALTEGRAVIR","RAL",
                                                                                                                                                                                            ifelse(art=="RILPIVIRINE","RPV",
                                                                                                                                                                                                   ifelse(art=="RITONAVIR","RTV",
                                                                                                                                                                                                          ifelse(art=="RITONAVIR BOOSTED","RTV",
                                                                                                                                                                                                                 ifelse(art=="SAQUINAVIR HARD CAP","SQV",
                                                                                                                                                                                                                        ifelse(art=="SAQUINAVIR SOFT GEL","SQV",
                                                                                                                                                                                                                               ifelse(art=="SAQUINAVIR UNSPECIFIED","SQV",
                                                                                                                                                                                                                                      ifelse(art=="STAVUDINE","D4T",
                                                                                                                                                                                                                                             ifelse(art=="STRIBILD","EVG,FTC,TDF",
                                                                                                                                                                                                                                                    ifelse(art=="TENOFOVIR","TDF",
                                                                                                                                                                                                                                                           ifelse(art=="TIPRANAVIR","TPV",
                                                                                                                                                                                                                                                                  ifelse(art=="TRIZIVIR","3TC,ABC,AZT",
                                                                                                                                                                                                                                                                         ifelse(art=="TRUVADA","FTC,3TC",
                                                                                                                                                                                                                                                                                ifelse(art=="ZALCITABINE","DDC",
                                                                                                                                                                                                                                                                                       ifelse(art=="ZIDOVUDINE","AZT",as.character(art)))))))))))))))))))))))))))))))))))))))))


####  Many people only have data up to the month-level in accuracy; this makes me do the following work-around stuff.

start.date1<-med$StartDate

stuff<-matrix(unlist(strsplit(as.character(start.date1),'-')),ncol=3,byrow=TRUE)
start.year<-stuff[,1]
start.month<-stuff[,2]
start.day<-stuff[,3]

start.day1<-ifelse(start.day=="00","15",start.day)
start.month1<-ifelse(start.month=="00","07",start.month)
start.date.approx<-ifelse(start.month=="00","y",
                          ifelse(start.month!="00"&start.day=="00","m","d"))

start.date<-NULL
for (i in 1:length(start.date1)) {
  start.date[i]<-paste(paste(start.year[i],start.month1[i],sep="-",collapse=""),start.day1[i],sep="-",collapse="")
}
#######  This new date variable includes an approximation variable next to it 
#######  (y=known up to year, m=known up to month, d=known up to day/no approximation, n=unknown date)

id<-med$StudyId
unique.id<-unique(id)
start.date<-as.Date(start.date)

####### Finding date of first ARV regimen
locate1<-NULL
for (i in 1:length(unique.id)) {
  locate1[i]<-which(id==unique.id[i]&start.date==min(start.date[id==unique.id[i]],na.rm=TRUE))[1]
}
init.date<-as.Date(start.date[locate1])
init.date.approx<-start.date.approx[locate1]

####  I should exclude those with init.date.approx=="y" because I only know start date up to the year.  This is done later.


####   Finding initial regimens
reg<-NULL
for (i in 1:length(unique.id)) {
  reg[i]<-paste(art1[id==unique.id[i]&start.date==init.date[i]],collapse=",",sep="")
}
init.reg<-sapply(lapply(lapply(strsplit(as.character(reg),','),unique),sort),FUN=paste,collapse=",")

####  Checking to see if initial regimen is HAART.  This function excludes those that are obviously not HAART.
reg.haart<-function(init.reg) {
  haart<-ifelse(init.reg=="3TC"|
                  init.reg=="3TC,ABC"|
                  init.reg=="3TC,AZT"|
                  init.reg=="3TC,D4T"|
                  init.reg=="3TC,EFV"|
                  init.reg=="3TC,FTC"|
                  init.reg=="3TC,TDF"|
                  init.reg=="ATZ"|
                  init.reg=="ATZ,RTV"|
                  init.reg=="AZT,IDV"|
                  init.reg=="D4T,DDI"|
                  init.reg=="D4T,FTC"|
                  init.reg=="DDI,EFV"|
                  init.reg=="DDI,IDV"|
                  init.reg=="DDI,TDF"|
                  init.reg=="DLV,EFV"|
                  init.reg=="DLV,NFV"|
                  init.reg=="DRV"|
                  init.reg=="DRV,RTV"|
                  init.reg=="EFV"|
                  init.reg=="EFV,FTC"|
                  init.reg=="EFV,TDF"|
                  init.reg=="FPV,RTV"|
                  init.reg=="FTC"|
                  init.reg=="IDV"|
                  init.reg=="LPV,RTV"|
                  init.reg=="MRV"|
                  init.reg=="NVP"|
                  init.reg=="RAL"|
                  init.reg=="RTV"|
                  init.reg=="unknown ARV"|
                  init.reg=="3TC,DDI"|
                  init.reg=="3TC,NVP"|
                  init.reg=="3TC,NFV"|
                  init.reg=="3TC,RAL"|
                  init.reg=="ABC"|
                  init.reg=="ATZ,EFV"|
                  init.reg=="AZT"|
                  init.reg=="D4T,EFV"|
                  init.reg=="D4T,NFV"|
                  init.reg=="DDI"|
                  init.reg=="ETR"|
                  init.reg=="ETR,RAL"|
                  init.reg=="NFV"|
                  init.reg=="RTV,SQV"|
                  init.reg=="TDF"|
                  init.reg=="D4T,NVP"|
                  init.reg=="D4T"|
                  init.reg=="ABC,NFV"|
                  init.reg=="",0,1)
  haart
}

haart1<-reg.haart(init.reg)


#####  Some people who started a non-HAART regimen may have started a HAART regimen shortly thereafter.  For example,
#####  sometimes people are put on a single drug at first and then receive the full regimen after one week.  The 
#####  following code is capturing these people, using a <32 day cutoff.  I end up including roughly 20 more patients.
locate2<-rep(NA,length(unique.id))
for (i in 1:length(unique.id)) {
  if (haart1[i]==0 & length(unique(start.date[id==unique.id[i]]))>1) {
    locate2[i]<-which(id==unique.id[i]&start.date==min(start.date[id==unique.id[i]&start.date!=init.date[i]],na.rm=TRUE))[1]
  }
}
second.date<-as.Date(start.date[locate2])
second.date.approx<-start.date.approx[locate2]
diff.dates<-as.numeric(second.date-init.date)

stop.date1<-med$EndDate

stuff<-matrix(unlist(strsplit(as.character(stop.date1),'-')),ncol=3,byrow=TRUE)
stop.year<-stuff[,1]
stop.month<-stuff[,2]
stop.day<-stuff[,3]

stop.day1<-ifelse(stop.day=="00","15",stop.day)
stop.month1<-ifelse(stop.month=="00","07",stop.month)
stop.year1<-ifelse(stop.year=="0000","3000",stop.year)
stop.date.approx<-ifelse(stop.year=="0000","n",
                         ifelse(stop.year!="0000"&stop.month=="00","y",
                                ifelse(stop.year!="0000"&stop.month!="00"&stop.day=="00","m","d")))

stop.date<-NULL
for (i in 1:length(stop.date1)) {
  stop.date[i]<-paste(paste(stop.year1[i],stop.month1[i],sep="-",collapse=""),stop.day1[i],sep="-",collapse="")
}

init.stop.date<-as.Date(stop.date[locate1])
init.stop.date.approx<-stop.date.approx[locate1]

reg2<-rep(NA,length(unique.id))
for (i in 1:length(unique.id)) {
  if (!is.na(second.date[i])) {
    reg2[i]<-paste(art1[id==unique.id[i]&start.date==second.date[i]],collapse=",",sep="")
  }
}
second.reg<-sapply(lapply(lapply(strsplit(as.character(reg2),','),unique),sort),FUN=paste,collapse=",")

table(second.reg[diff.dates<32&second.date.approx!="y"])
table(second.reg[diff.dates<32&second.date.approx!="y"&init.stop.date>second.date])
table(second.reg[diff.dates<32&second.date.approx!="y"&init.stop.date<second.date])


junk<-ifelse(!is.na(second.reg)&!is.na(diff.dates)&diff.dates<32&second.date.approx!="y"&init.stop.date>second.date&init.stop.date.approx!="y",1,0)
junk1<-rep(NA,length(unique.id))
for (i in 1:length(unique.id)) {
  if (junk[i]==1) {
    junk1[i]<-paste(init.reg[i],second.reg[i],collapse="",sep=",")
  }
}
reg1and2<-sapply(lapply(lapply(strsplit(as.character(junk1),','),unique),sort),FUN=paste,collapse=",")


haart<-ifelse(haart1==0&reg.haart(reg1and2)==1,1,haart1)


##### Persons to include in the HAART-initiators analysis are those who start HAART and whose date of initiation is known at least up to the month.

include<-ifelse(haart==1&init.date.approx!="y",1,0)

#### Creating the antiretroviral table
tbl.art<-data.frame(id=unique.id[include==1],
                    init.date=init.date[include==1],
                    init.date.approx=init.date.approx[include==1])


#### Merging with demographics table
d<-merge(tbl.art,dems,by.x="id",by.y="StudyId",all.x=TRUE)


#####  CD4 counts

### Only including those in tbl.art at this point.
lab.cd4<-lab[lab$TestName=="CD4 cell absolute" & lab$StudyId %in% d$id,]

trash<-matrix(unlist(strsplit(as.character(lab.cd4$ResultDate),' ')),ncol=2,byrow=TRUE)
cd4.d<-trash[,1]
cd4.date1<-as.Date(cd4.d)

stuff<-matrix(unlist(strsplit(as.character(cd4.d),'-')),ncol=3,byrow=TRUE)
cd4.year<-stuff[,1]
cd4.month<-stuff[,2]
cd4.day<-stuff[,3]

cd4.day1<-ifelse(cd4.day=="00","15",cd4.day)
cd4.month1<-ifelse(cd4.month=="00","07",cd4.month)
cd4.date.approx<-ifelse(cd4.month=="00","y",
                        ifelse(cd4.month!="00"&cd4.day=="00","m","d"))

cd4.date<-NULL
for (i in 1:length(cd4.date1)) {
  cd4.date[i]<-paste(paste(cd4.year[i],cd4.month1[i],sep="-",collapse=""),cd4.day1[i],sep="-",collapse="")
}
cd4.date<-as.Date(cd4.date)


head(lab.cd4)
table(lab.cd4$Units)   ## all units are the same
table(lab.cd4$Interpretation)  ### I'm ignoring Interpretation, DataSource, and Historical for now.
table(lab.cd4$DataSource)
table(lab.cd4$Historical)

### The following CD4 measurements are not exact.  
unique(lab.cd4$Result[which(is.na(as.numeric(as.character(lab.cd4$Result))))])

### We'll replace the inexact CD4 measurements with the following:
cd4<-lab.cd4$Result

cd4.new<-as.character(ifelse(cd4=="330 CELLS",330,
                             ifelse(cd4=="150 (?)","150",
                                    ifelse(cd4=="~500","500",
                                           ifelse(cd4=="<200","190",
                                                  ifelse(cd4=="<10","5",
                                                         ifelse(cd4=="<1","0",
                                                                ifelse(cd4=="<150","140",
                                                                       ifelse(cd4=="<20","10",
                                                                              ifelse(cd4=="<500","490",
                                                                                     ifelse(cd4==">500","510",
                                                                                            ifelse(cd4==">700","710",
                                                                                                   ifelse(cd4=="~550","550",
                                                                                                          ifelse(cd4=="~71","71",
                                                                                                                 as.character(cd4)))))))))))))))
cd4.new<-as.numeric(cd4.new)


#####  Now merging CD4 data with ART data and demographics

d.cd4<-data.frame(id=lab.cd4$StudyId,cd4.date,cd4.date.approx,cd4=cd4.new)

d1<-merge(d.cd4,d,by="id",all.x=TRUE)


##### Saving stuff so I don't have to re-run all of the above each time
save(d1,d,file="cnics-cole-dems-art-cd4.Rda")


#######  Starting with a clean slate
rm(list=ls())
load("cnics-cole-dems-art-cd4.Rda")

d1<-d1[!is.na(d1$cd4),]

#####  Now defining baseline CD4

window.pre<-180
window.post<-0

unique.id<-unique(d1$id)

locate<-rep(NA,length(unique.id))
for (i in 1:length(unique.id)) {
  if (length(d1$cd4[d1$id==unique.id[i] & d1$cd4.date<=d1$init.date & d1$cd4.date>=d1$init.date-window.pre])>0) {
    locate[i]<-which(d1$cd4.date==max(d1$cd4.date[d1$id==unique.id[i] & d1$cd4.date<=d1$init.date & d1$cd4.date>=d1$init.date-window.pre])&
                       d1$id==unique.id[i] & d1$cd4.date<=d1$init.date & d1$cd4.date>=d1$init.date-window.pre)[1]
  }
}
cd4.baseline<-d1$cd4[locate]
cd4.date<-d1$cd4.date[locate]

d.cd4b<-data.frame(id=unique.id,cd4=cd4.baseline,cd4.date)

d<-merge(d,d.cd4b,by="id",all.x=TRUE)


#####  Now let's look at the clinical endpoint data

mort<-read.csv("Mortality2.csv")

##### Date of death is only known up to the month; therefore I will set all death dates as on the 15th.

death.date1<-mort$DeathDate

stuff<-matrix(unlist(strsplit(as.character(death.date1),'-')),ncol=3,byrow=TRUE)
death.year<-stuff[,1]
death.month<-stuff[,2]
death.day<-stuff[,3]


death.day1<-ifelse(death.day=="00","15",death.day)
death.month1<-ifelse(death.month=="00","07",death.month)
death.date.approx<-ifelse(death.month=="00","y",
                          ifelse(death.month!="00"&death.day=="00","m","d"))

death.date<-NULL
for (i in 1:length(death.date1)) {
  death.date[i]<-paste(paste(death.year[i],death.month1[i],sep="-",collapse=""),death.day1[i],sep="-",collapse="")
}

mort1<-data.frame(id=mort$StudyId,death=1,death.date,death.date.approx)


##### Now clinical diagnoses

dia<-read.csv("Diagnosis2.csv")


clin.date1<-dia$DiagnosisDate

stuff<-matrix(unlist(strsplit(as.character(clin.date1),'-')),ncol=3,byrow=TRUE)
clin.year<-stuff[,1]
clin.month<-stuff[,2]
clin.day<-stuff[,3]

clin.day1<-ifelse(clin.day=="00","15",clin.day)
clin.month1<-ifelse(clin.month=="00","07",clin.month)
clin.year1<-ifelse(clin.year=="0000","3000",clin.year)
clin.date.approx<-ifelse(clin.year=="0000","n",
                         ifelse(clin.year!="0000"&clin.month=="00","y",
                                ifelse(clin.year!="0000"&clin.month!="00"&clin.day=="00","m","d")))

clin.date<-NULL
for (i in 1:length(clin.date1)) {
  clin.date[i]<-paste(paste(clin.year[i],clin.month1[i],sep="-",collapse=""),clin.day1[i],sep="-",collapse="")
}

clin<-data.frame(id=dia$StudyId,ade=1,ade.date=clin.date,ade.date.approx=clin.date.approx)


######  For the Cole analysis, I need to know who has had an ADE prior to or after HAART.

d2<-merge(d,clin,by="id")

id<-d2$id
ade.date<-as.Date(d2$ade.date)
init.date<-as.Date(d2$init.date)
ade.date.approx<-d2$ade.date.approx
unique.id<-unique(id)

prior.ade<-ifelse(ade.date<=init.date,1,0)
locate.ade<-prior.ade1<-rep(NA,length(unique.id))
for (i in 1:length(unique.id)) {
  if (sum(ade.date[id==unique.id[i]]>init.date[id==unique.id[i]])>0) {
    locate.ade[i]<-which(ade.date==min(ade.date[id==unique.id[i]&ade.date>init.date])&id==unique.id[i])[1] 
  }
  prior.ade1[i]<-ifelse(sum(prior.ade[id==unique.id[i]])>0,1,0)
}
ade.date.post.haart<-ade.date[locate.ade]
ade.date.post.haart.approx<-ade.date.approx[locate.ade]

ade<-ifelse(!is.na(ade.date.post.haart),1,0)

clin.events<-data.frame(id=unique.id,prior.ade=prior.ade1,ade,ade.date=ade.date.post.haart,ade.date.approx=ade.date.post.haart.approx)


#######  Now I'm merging clinical events and mortality datasets to the main dataset.

d.new1<-merge(d,clin.events,by="id",all.x=TRUE)
d.new2<-merge(d.new1,mort1,by="id",all.x=TRUE)

d.new2$prior.ade<-ifelse(is.na(d.new2$prior.ade),0,d.new2$prior.ade)  
d.new2$ade<-ifelse(is.na(d.new2$ade),0,d.new2$ade)  
d.new2$death<-ifelse(is.na(d.new2$death),0,d.new2$death)  

d.new2$event<-ifelse(d.new2$ade==1|d.new2$death==1,1,0)
d.new2$event.date<-as.Date(ifelse(d.new2$ade==1,as.character(d.new2$ade.date),as.character(d.new2$death.date)))

d<-d.new2


##### Saving stuff so I don't have to re-run all of the above each time
save(d,file="cnics-cole-stuff2.Rda")


#######  Starting with a clean slate
rm(list=ls())
load("cnics-cole-stuff2.Rda")


d$male<-ifelse(d$PresentSex=="Female",0,1)
d$PresentSex<-d$BirthSex<-d$Transgendered<-NULL

##### Creating an artificial date of birth as BirthYear-07-15.
dob<-NULL
for (i in 1:length(d$BirthYear)){
  dob[i]<-paste(d$BirthYear[i],"-07-15",sep="",collapse="")
}
d$dob<-as.Date(dob)

d$age.haart<-as.numeric(d$init.date-d$dob)/365.25


#### LTFU
#####  Now computing date of last visit


med<-read.csv("Medication2.csv")
lab<-read.csv("Lab2.csv")

unique.id<-unique(d$id)


date.calc<-function(x.date1) {
  stuff<-matrix(unlist(strsplit(as.character(x.date1),'-')),ncol=3,byrow=TRUE)
  x.year<-stuff[,1]
  x.month<-stuff[,2]
  x.day<-stuff[,3]
  
  x.day1<-ifelse(x.day=="00","15",x.day)
  x.month1<-ifelse(x.month=="00","07",x.month)
  x.year1<-ifelse(x.year=="0000","1900",x.year)
  x.date.approx<-ifelse(x.year=="0000","n",
                        ifelse(x.year!="0000"&x.month=="00","y",
                               ifelse(x.year!="0000"&x.month!="00"&x.day=="00","m","d")))
  x.date<-NULL
  for (i in 1:length(x.date1)) {
    x.date[i]<-paste(paste(x.year1[i],x.month1[i],sep="-",collapse=""),x.day1[i],sep="-",collapse="")
  }
  x.date<-as.Date(x.date)
  x<-data.frame(x.date,x.date.approx)
  return(x)
}


start.dates<-cbind(med$StudyId,date.calc(med$StartDate))
end.dates<-cbind(med$StudyId,date.calc(med$EndDate))

trash<-matrix(unlist(strsplit(as.character(lab$ResultDate),' ')),ncol=2,byrow=TRUE)
lab.d<-trash[,1]
lab.dates<-cbind(lab$StudyId,date.calc(lab.d))

locate.max.s<-locate.max.e<-locate.max.lab<-NULL
for (i in 1:length(unique.id)) {
  locate.max.s[i]<-which(start.dates[,1]==unique.id[i]&start.dates$x.date==max(start.dates$x.date[start.dates[,1]==unique.id[i]]))[1]
  locate.max.e[i]<-which(end.dates[,1]==unique.id[i]&end.dates$x.date==max(end.dates$x.date[end.dates[,1]==unique.id[i]]))[1]
  locate.max.lab[i]<-which(lab.dates[,1]==unique.id[i]&lab.dates$x.date==max(lab.dates$x.date[lab.dates[,1]==unique.id[i]]))[1]
}
start.date.max<-start.dates$x.date[locate.max.s]
end.date.max<-end.dates$x.date[locate.max.e]
lab.date.max<-lab.dates$x.date[locate.max.lab]

last.date<-pmax(start.date.max,end.date.max,lab.date.max,na.rm=TRUE)
last.dates<-data.frame(id=unique.id,last.date)

d<-merge(d,last.dates,by="id")


d$age.cd4<-as.numeric(d$cd4.date-d$dob)/365.25

##### Saving stuff so I don't have to re-run all of the above each time
save(d,lab.dates,end.dates,start.dates,file="cnics-cole-stuff3.Rda")


#######  Starting with a clean slate
rm(list=ls())
load("cnics-cole-stuff3.Rda")


####  Computing prospective LTFU with all data  (2014-04-04)
####   I decided not to incorporate this into the analyses (2014-06-25), because it is difficult to 
####  properly compute prospective definitions of LTFU with my implementation of the Hernan approach,
####  and I decided that the effort was not worth the gain.  However, I have kept the code here; it 
####  can be commented out.

mort<-read.csv("Mortality2.csv")
dia<-read.csv("Diagnosis2.csv")

mort.dates<-mort[,c("StudyId","DeathDate")]
mort.dates$x.date<-mort.dates$DeathDate
mort.dates$x.date.approx<-"d"
mort.dates$DeathDate<-NULL

dia.dates<-dia[,c("StudyId","DiagnosisDate")]
dia.dates$x.date<-dia.dates$DiagnosisDate
dia.dates$x.date.approx<-"d"
dia.dates$DiagnosisDate<-NULL

end.dates1<-data.frame(StudyId=end.dates$'med$StudyId',x.date=as.character(end.dates$x.date),x.date.approx=as.character(end.dates$x.date.approx))
lab.dates1<-data.frame(StudyId=lab.dates$'lab$StudyId',x.date=as.character(lab.dates$x.date),x.date.approx=as.character(lab.dates$x.date.approx))
start.dates1<-data.frame(StudyId=start.dates$'med$StudyId',x.date=as.character(start.dates$x.date),x.date.approx=as.character(start.dates$x.date.approx))

all.dates2<-rbind(dia.dates,mort.dates,end.dates1,lab.dates1,start.dates1)
all.dates1<-merge(all.dates2,d[,c("id","init.date")],by.x="StudyId",by.y="id",all.x=TRUE)
all.dates1<-all.dates1[!is.na(all.dates1$init.date),]
all.dates1$x.date<-as.Date(all.dates1$x.date)
ord<-order(all.dates1$StudyId,all.dates1$x.date)
all.dates<-all.dates1[ord,]

all.dates.post.art<-all.dates[all.dates$x.date>=all.dates$init.date,]

stuff0<-as.numeric(c(all.dates.post.art$x.date,NA))
stuff1<-as.numeric(c(NA,all.dates.post.art$x.date))
diff1<-stuff0-stuff1
diff<-diff1[-1]
junk<-c(duplicated(all.dates.post.art$StudyId)[-1],NA)
all.dates.post.art$diff<-diff
all.dates.post.art$diff<-with(all.dates.post.art, ifelse(junk,diff,NA))
all.dates.post.art$flag<-with(all.dates.post.art, ifelse(is.na(diff),0,ifelse(diff>365,1,0)))

d.ltfu<-all.dates.post.art[all.dates.post.art$flag==1,]
d.ltfu<-d.ltfu[!duplicated(d.ltfu$StudyId),c("StudyId","x.date")]
d.ltfu$ltfu.date<-d.ltfu$x.date
d.ltfu$x.date<-NULL

d<-merge(d,d.ltfu,by.x="id",by.y="StudyId",all.x=TRUE)


d$death.date<-as.Date(d$death.date)

stuff<-matrix(unlist(strsplit(as.character(d$init.date),'-')),ncol=3,byrow=TRUE)
year.haart<-stuff[,1]
d$year.haart<-year.haart

d$black<-ifelse(d$Race=="Black",1,0)
d$white<-ifelse(d$Race=="White",1,0)

d$t.death<-with(d, ifelse(death==1,death.date-init.date,last.date-init.date))
d$t.ade<-with(d, ifelse(ade==1,ade.date-init.date,t.death))
d$t.event<-with(d, pmin(t.ade,t.death))


#### Changed so that people who are LTFU are censored at the time of LTFU  (change made 2014-06-25)
####  This is the code you would use if you are incorporating a prospective LTFU definition.
# d$t.death<-with(d,ifelse(!is.na(ltfu.date),ltfu.date-init.date,
#                   ifelse(death==1,death.date-init.date,last.date-init.date)))
# d$t.ade<-with(d,ifelse(!is.na(ltfu.date) & !is.na(ade.date) & ltfu.date<ade.date, ltfu.date-init.date,
#                 ifelse(!is.na(ltfu.date) & is.na(ade.date), ltfu.date-init.date,
#                 ifelse(ade==1,ade.date-init.date,last.date-init.date))))
# d$t.event<-with(d, pmin(t.ade,t.death))
# 
# d$death1<-with(d,ifelse(!is.na(ltfu.date),0,death))
# d$ade1<-with(d,ifelse(!is.na(ltfu.date) & !is.na(ade.date) & ltfu.date<ade.date, 0, ade))
# d$event1<-with(d, ifelse(ade1==1,1,death1))

#### Reading in risk factors; I'm really only interested in IDU

rf<-read.csv("RiskFactor2.csv")
head(rf)
length(unique(rf$StudyId)) ### A single ID can have more than one row in this dataset

rf$idu<-with(rf,ifelse(Risk=="Injection drug use"|Risk=="Men who have sex with men and are an injection drug user",1,0))

idu<-NULL
for (i in 1:length(unique(rf$StudyId))) {
  idu[i]<-sum(rf$idu[rf$StudyId==unique(rf$StudyId)[i]])
}
idu<-ifelse(is.na(idu),0,idu)   


drf<-data.frame(id=unique(rf$StudyId),idu)

d1<-merge(d,drf,by="id",all.x=TRUE)

dc<-data.frame(id=d1$id,male=d1$male,black=d1$black,white=d1$white,idu=d1$idu,year.haart=d1$year.haart,cd4=d1$cd4,
               age.cd4=d1$age.cd4,age.haart=d1$age.haart,
               t.event=d1$t.event,event=d1$event,t.ade=d1$t.ade,ade=d1$ade,t.death=d1$t.death,death=d1$death,site=d1$Site)


#### Now I'm merging in Vandy data

# load("../old-vandy-analysis/vandy-cole-stuff.rda")
# dv<-v[,!names(v)%in%c("ltfu","vl")]
load("../old-vandy-analysis/vandy-cole-stuff-2014-06-25.rda")  
dv<-v
dv$site<-"VU"
exclude<-with(dv,ifelse(t.ade==0&ade==1,1,0))
dv<-dv[exclude==0,]


d<-rbind(dc,dv)

d<-d[!is.na(d$cd4),]    ####  They have to have a baseline CD4 to be included in the Cole analysis.  I'm excluding people without a baseline CD4 here.

###### Saving file to avoid re-running everything
save(d,file="cnics-cole-stuff4.Rda")


######### Doing several things to mimic what was done in Sterne analysis:

rm(list=ls())
setwd("/Volumes/encrypted/cnics/analyses-2013-Nov")
load("cnics-cole-stuff4.Rda")


####  Excluding IDU
d$idu<-ifelse(is.na(d$idu),0,d$idu)
d.n.idu<-d[d$idu==0,]

####  Excluding those with prior ADE
##  they've already been excluded from both datasets.  I can include them without too much trouble, but it will take re-
##  running some coded.

####  Add 50% of mean time between visits for each cohort to the patient's last visit
#####  Based on CD4 counts, this is approximately every 3-4 months for all sites, so we'll add 2 months to the last visit of those
#####  who don't have an event.  The code commented out below computed this.  I just assume it's similar for Vanderbilt and haven't
#####  evaluated.

# rm(list=ls())
# load("cnics-cole-dems-art-cd4.Rda")
# 
# d1<-d1[d1$cd4.date>=1995-01-01,]
# unique.id<-unique(d1$id)
# 
# time.btwn<-rep(NA,length(unique.id))
# for (i in 1:length(unique.id)){
#   if (sum(d1$id==unique.id[i])>1){
#     time.btwn[i]<-(max(d1$cd4.date[d1$id==unique.id[i]])-min(d1$cd4.date[d1$id==unique.id[i]]))/(sum(d1$id==unique.id[i])-1)
#   }
# }
# site<-NULL
# for (i in 1:length(unique.id)){
#   site[i]<-as.character(d1$Site[d1$id==unique.id[i]])[1]
# }
# table(site)
# summary(time.btwn)                   ### mean=131, median=112
# summary(time.btwn[site=="CWRU"])     ### mean=110, median=100
# summary(time.btwn[site=="FENWAY"])   ### mean=127, median=111
# summary(time.btwn[site=="JH"])       ### mean=128, median=107
# summary(time.btwn[site=="UAB"])      ### mean=127, median=113
# summary(time.btwn[site=="UCSD"])     ### mean=131, median=106
# summary(time.btwn[site=="UNC"])      ### mean=124, median=103
# summary(time.btwn[site=="UW"])       ### mean=144, median=124

d.add2mo<-d
d.add2mo$t.death<-with(d.add2mo, ifelse(death==0, t.death+60, t.death))
d.add2mo$t.event<-with(d.add2mo, ifelse(event==0, t.event+60, t.event))
d.add2mo$t.ade  <-with(d.add2mo, ifelse(ade==0&death==0, t.ade+60, t.ade))


####  Administratively censor patients 6 years after treatment initiation

d.trunc6yrs<-d
d.trunc6yrs$t.event<-with(d.trunc6yrs,ifelse(t.event>round(365.25*6),round(365.25*6),t.event))
d.trunc6yrs$event<-with(d.trunc6yrs,ifelse(t.event>round(365.25*6),0,event))
d.trunc6yrs$t.ade<-with(d.trunc6yrs,ifelse(t.ade>round(365.25*6),round(365.25*6),t.ade))
d.trunc6yrs$ade<-with(d.trunc6yrs,ifelse(t.ade>round(365.25*6),0,ade))
d.trunc6yrs$t.death<-with(d.trunc6yrs,ifelse(t.death>round(365.25*6),round(365.25*6),t.death))
d.trunc6yrs$death<-with(d.trunc6yrs,ifelse(t.death>round(365.25*6),0,death))


####  Exclude deaths within 2 weeks of initiation  --  This is only for the death analysis

d.exc.deaths2wks<-d[!(d$death==1&d$t.death<=14),]


####  Doing everything (excluding IDU, adding time to last visit, truncating at 6 years, and excluding deaths within 2 weeks 
####    for the mortality analysis.)


d1<-d[d$idu==0,]
d1$t.death<-with(d1, ifelse(death==0, t.death+60, t.death))
d1$t.event<-with(d1, ifelse(event==0, t.event+60, t.event))
d1$t.ade  <-with(d1, ifelse(ade==0&death==0, t.ade+60, t.ade))
d1$t.event<-with(d1,ifelse(t.event>round(365.25*6),round(365.25*6),t.event))
d1$event<-with(d1,ifelse(t.event>round(365.25*6),0,event))
d1$t.ade<-with(d1,ifelse(t.ade>round(365.25*6),round(365.25*6),t.ade))
d1$ade<-with(d1,ifelse(t.ade>round(365.25*6),0,ade))
d1$t.death<-with(d1,ifelse(t.death>round(365.25*6),round(365.25*6),t.death))
d1$death<-with(d1,ifelse(t.death>round(365.25*6),0,death))
d1$death<-with(d1,ifelse(death==1&t.death<=14,NA,death))
d1$t.death<-with(d1,ifelse(death==1&t.death<=14,NA,t.death))


######  All time should be on years scale for consistency with Sterne parameters
d$t.death<-d$t.death/365.25
d$t.event<-d$t.event/365.25
d$t.ade  <-d$t.ade  /365.25

d.add2mo$t.death<-d.add2mo$t.death/365.25
d.add2mo$t.event<-d.add2mo$t.event/365.25
d.add2mo$t.ade  <-d.add2mo$t.ade  /365.25

d.exc.deaths2wks$t.death<-d.exc.deaths2wks$t.death/365.25
d.exc.deaths2wks$t.event<-d.exc.deaths2wks$t.event/365.25
d.exc.deaths2wks$t.ade  <-d.exc.deaths2wks$t.ade  /365.25

d.n.idu$t.death<-d.n.idu$t.death/365.25
d.n.idu$t.event<-d.n.idu$t.event/365.25
d.n.idu$t.ade  <-d.n.idu$t.ade  /365.25

d.trunc6yrs$t.death<-d.trunc6yrs$t.death/365.25
d.trunc6yrs$t.event<-d.trunc6yrs$t.event/365.25
d.trunc6yrs$t.ade  <-d.trunc6yrs$t.ade  /365.25

d1$t.death<-d1$t.death/365.25
d1$t.event<-d1$t.event/365.25
d1$t.ade  <-d1$t.ade  /365.25


###### Saving files to avoid re-running everything
save(d,d.add2mo,d.exc.deaths2wks,d.n.idu,d.trunc6yrs,d1,file="cnics-cole-stuff5.Rda")


##### adding by Qi for censoring >1 year visit gap
######## PULL info about visits date for cnics dataset
rm(list=ls())
#### "cnics-cole-stuff3.Rda" has dates info for lab and medication
load("cnics-cole-stuff3.Rda")
med<-read.csv("Medication2.csv")
lab<-read.csv("Lab2.csv")
mort<-read.csv("Mortality2.csv")
dia<-read.csv("Diagnosis2.csv")
dems<-read.csv("Demographic2.csv")

#### get dates from diagnosis and mortality 
mort.dates<-mort[,c("StudyId","DeathDate")]
mort.dates$x.date<-mort.dates$DeathDate
mort.dates$x.date.approx<-"d"
mort.dates$DeathDate<-NULL

dia.dates<-dia[,c("StudyId","DiagnosisDate")]
dia.dates$x.date<-dia.dates$DiagnosisDate
dia.dates$x.date.approx<-"d"
dia.dates$DiagnosisDate<-NULL
colnames(lab.dates) <- colnames(dia.dates)
colnames(start.dates) <- colnames(dia.dates)
colnames(end.dates) <- colnames(dia.dates)

##### visit dates include any dates info from lab, med, mort, and dia
visit.dates <- rbind(lab.dates[!duplicated(lab.dates),], 
                     dia.dates[!duplicated(dia.dates),],
                     mort.dates[!duplicated(mort.dates),],
                     end.dates[!duplicated(end.dates),],
                     start.dates[!duplicated(start.dates),])

visit.dates <- visit.dates[order(visit.dates$StudyId, visit.dates$x.date),]
visit.dates <- visit.dates[!duplicated(visit.dates),]
colnames(visit.dates) <- c("id", "date", "date.approx")

visit.date.2 <- merge(visit.dates, d[, c("id", "dob")], by="id")
visit.date.2 <- visit.date.2[order(visit.date.2$id, visit.date.2$date), ]
visit.date.2$visit.age <- as.numeric(visit.date.2$date - visit.date.2$dob)/365.25
visit.date.2 <- subset(visit.date.2, visit.age>0)
visit.date.2 <- visit.date.2[, c("id", "visit.age")]
visit.date.2 <- visit.date.2[!duplicated(visit.date.2),]
visit.date.2$last.visit.age <- visit.date.2$visit.age 
visit.date.2[duplicated(visit.date.2$id), "last.visit.age"] <- visit.date.2[which(duplicated(visit.date.2$id))-1, "visit.age"]

visit.date.2[!duplicated(visit.date.2$id), "last.visit.age"] <- NA
visit.date.2$gap <- visit.date.2$visit.age -visit.date.2$last.visit.age
visit.date.2$ltfu.gap <- visit.date.2$gap >1

visit.gap.cnics <- subset(visit.date.2, ltfu.gap==1)
colnames(visit.gap.cnics) <- c("id", "next.visit", "ltfu.age", "gap", "ltfu.gap")
ltfu.gap.cnics <- visit.gap.cnics[, c("id", "ltfu.age", "next.visit", "gap")]

save(ltfu.gap.cnics, file="ltfu_gap_cnics.Rda")
rm(list=ls())

load("ltfu_gap_cnics.Rda")

##### pull visit info for vandy data
library(foreign)

cl<-read.dta("../old-vandy-analysis/when_to_start_19nov2009.dta")
nade<-read.csv("../old-vandy-analysis/w2s_non_ades_dx_18nov2009.csv")
tx<-read.csv("../old-vandy-analysis/when_to_start_art_18nov2009.csv")
oi<-read.csv("../old-vandy-analysis/when_to_start_year1_oi_18nov2009.csv")

#### I grap any date info without any data cleaning
cl.age <- cl[, c("cfar_pid", "age")]
colnames(cl.age) <- c("id", "visit.age")
nade.age <- nade[, c("CFAR_PID", "AGE_AT_NON_ADE_DIAGNOSIS_ONSET")]
colnames(nade.age) <- c("id", "visit.age")
tx.start.age <-  tx[, c("CFAR_PID", "AGE_AT_RX_START")]
colnames(tx.start.age) <- c("id", "visit.age")
tx.end.age <- tx[, c("CFAR_PID", "AGE_AT_RX_END")]
colnames(tx.end.age) <- c("id", "visit.age")
oi.age <- oi[, c("CFAR_PID", "AGE_AT_OI")]
colnames(oi.age) <- c("id", "visit.age")

visit.age <- rbind(cl.age, nade.age, tx.start.age, tx.end.age, oi.age)
visit.age <- subset(visit.age, !is.na(visit.age))
visit.age <- visit.age[!duplicated(visit.age),]
visit.age <- visit.age[order(visit.age$id, visit.age$visit.age),]

visit.age$last.visit.age <- visit.age$visit.age 
visit.age[duplicated(visit.age$id), "last.visit.age"] <- visit.age[which(duplicated(visit.age$id))-1, "visit.age"]

visit.age[!duplicated(visit.age$id), "last.visit.age"] <- NA
visit.age$gap <- visit.age$visit.age -visit.age$last.visit.age
visit.age$ltfu.gap <- visit.age$gap >1

visit.gap.vandy <- subset(visit.age, ltfu.gap==1)
colnames(visit.gap.vandy) <- c("id", "next.visit", "ltfu.age", "gap", "ltfu.gap")
ltfu.gap.vandy <- visit.gap.vandy[, c("id", "ltfu.age", "next.visit", "gap")]


ltfu.gap.all <- rbind(ltfu.gap.cnics, ltfu.gap.vandy)
save(ltfu.gap.cnics, ltfu.gap.vandy, ltfu.gap.all, file="ltfu_gap.Rda")


rm(list=ls())
load("ltfu_gap.Rda")
load("cnics-cole-stuff5.Rda")
d.ltfu.gap <- merge(d, ltfu.gap.all, by="id", all.x=TRUE)
tmp <- subset(d.ltfu.gap,  (ltfu.age>=age.haart))
#### only need the first >1 year gap for each patient
tmp<- tmp[order(tmp$id, tmp$ltfu.age), ]
tmp <- tmp[!duplicated(tmp$id), ]
tmp$t.ltfu <- tmp$ltfu.age - tmp$age.haart
#### 77 patients: start haart, but only come back more than 1 year after trt initiation
d.gap1year.ltfu <- merge(d, tmp[, c("id", "t.ltfu")], by="id", all.x=TRUE)
#### 1660 of 9167 is censored due to >1 year gap after trt initiation
#tmp2 <- subset(d.gap1year.ltfu, !is.na(t.ltfu))
#d.gap1year.ltfu$death1 <- ifelse(d.gap1year.ltfu$t.ltfu<d.gap1year.ltfu$t.death, 0, d.gap1year.ltfu$death)
d.gap1year.ltfu$death1 <- with(d.gap1year.ltfu, ifelse(is.na(t.ltfu),death, 
                                                       ifelse(round(t.ltfu,4)<round(t.death,4), 0, death)))
d.gap1year.ltfu$t.death.1<- with(d.gap1year.ltfu, ifelse(is.na(t.ltfu), t.death, pmin(t.death, t.ltfu)))

d.gap1year.ltfu$ade1 <- with(d.gap1year.ltfu, ifelse(is.na(t.ltfu), ade, 
                                                     ifelse(round(t.ltfu,4)<round(t.ade,4), 0, ade)))

d.gap1year.ltfu$t.ade.1 <- with(d.gap1year.ltfu, ifelse(is.na(t.ltfu), t.ade, pmin(t.ade, t.ltfu)))
d.gap1year.ltfu$t.event.1 <- with(d.gap1year.ltfu, pmin(t.death.1, t.ade.1))
d.gap1year.ltfu$event1 <- with(d.gap1year.ltfu, ifelse(ade1==1, 1, death1))

with(d.gap1year.ltfu, sum(death1!=death, na.rm=TRUE))
dim(subset(d.gap1year.ltfu, (round(t.death.1,4)!=round(t.death,4))))

with(d.gap1year.ltfu, sum(ade1!=ade, na.rm=TRUE))
dim(subset(d.gap1year.ltfu, (round(t.ade.1,4)!=round(t.ade,4))))

with(d.gap1year.ltfu, sum(event1!=event, na.rm=TRUE))
dim(subset(d.gap1year.ltfu, (round(t.event.1,4)!=round(t.event,4))))

d.gap1year.ltfu$death <- d.gap1year.ltfu$death1
d.gap1year.ltfu$t.death <- d.gap1year.ltfu$t.death.1
d.gap1year.ltfu$ade <- d.gap1year.ltfu$ade1
d.gap1year.ltfu$t.ade <- d.gap1year.ltfu$t.ade.1
d.gap1year.ltfu$event <- d.gap1year.ltfu$event.1
d.gap1year.ltfu$t.event <- d.gap1year.ltfu$t.event.1

d.gap1year.ltfu <- subset(d.gap1year.ltfu, t.event>0)
d.gap1year.ltfu$t.event.1 <- d.gap1year.ltfu$event1 <- d.gap1year.ltfu$t.ade.1 <- d.gap1year.ltfu$ade1 <- d.gap1year.ltfu$death1 <- d.gap1year.ltfu$t.death.1 <- d.gap1year.ltfu$t.ltfu <- NULL
dim(d.gap1year.ltfu)
###### Saving files to avoid re-running everything
save(d, d.add2mo, d.exc.deaths2wks, d.n.idu, d.trunc6yrs, d1, d.gap1year.ltfu, file="cnics-cole-stuff6.Rda")


#save(d.gap1year.ltfu,file="junk.Rda")


######  Now let's do the analyses

rm(list=ls())
setwd("/Volumes/encrypted/cnics/analyses-2013-Nov")
load("cnics-cole-stuff6.Rda")


library(survival)

####  Plots of original data, using time from HAART initiation by CD4 strata.  This ignores lead time bias.

cd4.cat<-with(d,ifelse(cd4<101,"0-100",
                       ifelse(cd4<201,"101-200",
                              ifelse(cd4<301,"201-300",
                                     ifelse(cd4<401,"301-400",
                                            ifelse(cd4<501,"401-500",
                                                   ifelse(cd4<601,"501-600",
                                                          ">600")))))))
death<-d$death
t.death<-d$t.death

S.0<-Surv(t.death[cd4.cat=="0-100"],death[cd4.cat=="0-100"])
S.1<-Surv(t.death[cd4.cat=="101-200"],death[cd4.cat=="101-200"])
S.2<-Surv(t.death[cd4.cat=="201-300"],death[cd4.cat=="201-300"])
S.3<-Surv(t.death[cd4.cat=="301-400"],death[cd4.cat=="301-400"])
S.4<-Surv(t.death[cd4.cat=="401-500"],death[cd4.cat=="401-500"])
S.5<-Surv(t.death[cd4.cat=="501-600"],death[cd4.cat=="501-600"])
S.6<-Surv(t.death[cd4.cat==">600"],death[cd4.cat==">600"])

pdf("figure1-combined.pdf", height=8, width=8)  #Combining these into a single figure
par(mfrow=c(2,2))
#pdf("plot-of-survival.pdf",height=6,width=6)
par(mar=c(5, 4, 2, 2))
plot(survfit(Surv(t.death,death)~cd4.cat),xlim=c(0,10),ylim=c(0,.6), conf.int=FALSE, mark.time=FALSE, xlab="Years from Start of ART",
     ylab="Probability of Death", axes=FALSE,fun="event")
axis(2)
axis(1,at=c(0:10),label=c(0:10))
lines(survfit(S.0~1), mark.time=FALSE, col=2, fun="event")
lines(survfit(S.1~1), mark.time=FALSE, col=3, fun="event")
lines(survfit(S.2~1), mark.time=FALSE, col=4, fun="event")
lines(survfit(S.3~1), mark.time=FALSE, col=5, fun="event")
lines(survfit(S.4~1), mark.time=FALSE, col=6, fun="event")
lines(survfit(S.5~1), mark.time=FALSE, col=7, fun="event")
lines(survfit(S.6~1), mark.time=FALSE, col=8, fun="event")

legend("topleft",c("0-100","101-200","201-300","301-400","401-500","501-600",">600"),,col=c(2:8),lty=rep(1,7),bty="n")
#dev.off()


cd4<-d$cd4

library("rms")
mod<-coxph(Surv(t.death,death)~rcs(cd4,7))
stuff<-predict(mod,se.fit=TRUE)
#pdf("death-by-cd4.pdf",height=6, width=6)
#par(mar=c(5, 4, 2, 2))
plot(c(cd4[!is.na(cd4)],cd4[!is.na(cd4)],cd4[!is.na(cd4)]),
     c(stuff$fit+1.96*stuff$se.fit,stuff$fit-1.96*stuff$se.fit,stuff$fit),
     xlim=c(0,800),ylim=c(-1,1.5),type="n",xlab="CD4 at ART Initiation",ylab="Log-Hazard of Death",bty="n")
ord<-order(cd4[!is.na(cd4)])
lines(cd4[!is.na(cd4)][ord],stuff$fit[ord])
lines(cd4[!is.na(cd4)][ord],(stuff$fit-1.96*stuff$se.fit)[ord],col=gray(.5))
lines(cd4[!is.na(cd4)][ord],(stuff$fit+1.96*stuff$se.fit)[ord],col=gray(.5))
#dev.off()


event<-d$event
t.event<-d$t.event


S.0<-Surv(t.event[cd4.cat=="0-100"],event[cd4.cat=="0-100"])
S.1<-Surv(t.event[cd4.cat=="101-200"],event[cd4.cat=="101-200"])
S.2<-Surv(t.event[cd4.cat=="201-300"],event[cd4.cat=="201-300"])
S.3<-Surv(t.event[cd4.cat=="301-400"],event[cd4.cat=="301-400"])
S.4<-Surv(t.event[cd4.cat=="401-500"],event[cd4.cat=="401-500"])
S.5<-Surv(t.event[cd4.cat=="501-600"],event[cd4.cat=="501-600"])
S.6<-Surv(t.event[cd4.cat==">600"],event[cd4.cat==">600"])


#pdf("plot-of-ade-free-survival.pdf",height=6,width=6)
#par(mar=c(5, 4, 2, 2))
plot(survfit(Surv(t.event,event)~cd4.cat),xlim=c(0,10),ylim=c(0,.6), conf.int=FALSE, mark.time=FALSE, xlab="Years from Start of ART",
     ylab="Probability of AIDS or Death", axes=FALSE,fun="event")
axis(2)
axis(1,at=c(0:10),label=c(0:10))
lines(survfit(S.0~1), mark.time=FALSE, col=2, fun="event")
lines(survfit(S.1~1), mark.time=FALSE, col=3, fun="event")
lines(survfit(S.2~1), mark.time=FALSE, col=4, fun="event")
lines(survfit(S.3~1), mark.time=FALSE, col=5, fun="event")
lines(survfit(S.4~1), mark.time=FALSE, col=6, fun="event")
lines(survfit(S.5~1), mark.time=FALSE, col=7, fun="event")
lines(survfit(S.6~1), mark.time=FALSE, col=8, fun="event")

legend("topleft",c("0-100","101-200","201-300","301-400","401-500","501-600",">600"),col=c(2:8),lty=rep(1,7),bty="n")
#dev.off()


mod1<-coxph(Surv(t.event,event)~rcs(cd4,7))
stuff<-predict(mod1,se.fit=TRUE)
#pdf("aids-death-by-cd4.pdf", height=6, width=6)
plot(c(cd4[!is.na(cd4)],cd4[!is.na(cd4)],cd4[!is.na(cd4)]),
     c(stuff$fit+1.96*stuff$se.fit,stuff$fit-1.96*stuff$se.fit,stuff$fit),
     xlim=c(0,800),ylim=c(-1,1.5),type="n",xlab="CD4 at ART Initiation",ylab="Log-Hazard of AIDS or Death",bty="n")
ord<-order(cd4[!is.na(cd4)])
lines(cd4[!is.na(cd4)][ord],stuff$fit[ord])
lines(cd4[!is.na(cd4)][ord],(stuff$fit-1.96*stuff$se.fit)[ord],col=gray(.5))
lines(cd4[!is.na(cd4)][ord],(stuff$fit+1.96*stuff$se.fit)[ord],col=gray(.5))
dev.off()


##########################  Now I'm going to account for lead times using the approach of Cole and parameter values from Sterne et al.


rm(list=ls())
setwd("/Volumes/encrypted/cnics/analyses-2013-Nov")
load("cnics-cole-stuff6.Rda")

library(survival)

##########  Simulating generalized gamma data

qggamma <- function(beta, sigma, lambda, p) {
  
  s<-lambda^(-2)
  r<-s*exp((-beta*lambda)/sigma)
  b<-lambda/sigma
  quan<-(lambda>0)*((qgamma(p, shape=s, rate=r))^(1/b))
  quan<- quan+(lambda<0)*((qgamma(1-p,shape=s, rate=r))^(1/b))
  quan
  
}


###### Parameters received from Sterne, May, and Cole

par.ests.d<-  matrix(c(-0.01976,1.4140,0.8073,-0.3376,0.9424,0.9799,0.004661,450,
                       0.1292,1.4136,0.8473,0.1036,1.2702,0.8694,0.006369,425,
                       0.1378,1.4581,0.7093,-0.1678,1.2230,0.7157,0.006523,400,
                       0.1985,1.4208,0.7989,-0.1855,1.2914,0.7565,0.006381,375,
                       0.2142,1.3600,0.8139,0.3155,1.4237,0.6343,0.007307,350,
                       0.2717,1.2890,0.9180,0.005815,0.9229,0.6943,0.007987,325,
                       0.2535,1.2752,0.8687,-0.2549,1.2515,-0.09287,0.01010,300,
                       0.3532,1.2246,1.0406,-0.05621,1.2546,0.4186,0.01115,275,
                       0.3387,1.2135,0.9881,-0.1256,1.6811,-0.08963,0.01367,250,
                       0.3131,1.2104,0.9577,-0.2491,1.7802,-0.08660,0.01483,225,
                       0.3287,1.1857,0.9952,0.1319,1.4342,0.7271,0.01549,200,
                       0.2843,1.1348,0.9281,0.4699,0.7067,1.9816,0.01810,175,
                       0.3027,1.0971,0.9425,0.4677,0.6664,1.8550,0.02228,150,
                       0.2842,1.0680,0.9363,0.4622,0.7501,1.6866,0.02700,125,
                       0.2362,1.0575,0.8927,0.2233,1.1631,0.7395,0.03686,100),ncol=8,byrow=TRUE)


######                 mu1,   sig1,   del1,  mu2,  sig2,  del2,   pi,   threshold    
par.ests.e<-matrix(c(-0.03244,1.4193,0.7969,0.4088,1.0706,1.2097,0.01610,450,
                     0.1132,1.4260,0.8284,0.5776,0.9573,1.4753,0.02273,425,
                     0.1211,1.4686,0.6908,0.5509,1.0282,1.3532,0.02691,400,
                     0.1938,1.4293,0.7916,0.1801,1.1994,0.8842,0.02830,375,
                     0.2147,1.3643,0.8099,0.2086,2.3632,0.8688,0.03197,350,
                     0.2830,1.2848,0.9275,0.002276,1.1950,0.8346,0.03249,325,
                     0.2590,1.2710,0.8762,0.07611,1.3065,0.6431,0.03828,300,
                     0.3509,1.2137,1.0532,0.3388,1.4096,0.7650,0.05289,275,
                     0.3211,1.2222,0.9638,0.5177,1.2067,1.1048,0.06128,250,
                     0.2951,1.2273,0.9177,0.4338,1.1252,1.2341,0.06974,225,
                     0.3231,1.1839,0.9771,0.3187,1.2826,1.0666,0.08119,200,
                     0.2906,1.1298,0.9100,0.2165,1.1235,1.1676,0.09472,175,
                     0.3235,1.0885,0.9236,0.1419,1.0550,1.2139,0.1079,150,
                     0.2989,1.0653,0.8941,0.1774,1.0114,1.2725,0.1309,125,
                     0.2417,1.0461,0.8189,0.1073,1.1676,1.0102,0.1759,100),ncol=8,byrow=TRUE)


#basic.naive.d.400<-with(d,hr.compute(lower=300,upper=500,threshold.cd4=400,cd4=cd4,t.event=t.death,event=death))

#####  This function performs the analysis of Cole et al.

hr.compute.lt<-function(lower,upper,threshold.cd4,n.impute,cd4,t.event,event,par.ests,truncate=NA,discrete=FALSE,cd4.3=FALSE,age.haart=NA,age.cd4=NA) {
  
  select1<-ifelse(cd4>lower & cd4<=upper,1,0)
  select<-ifelse(is.na(select1),0,select1)
  if (cd4.3){
    select<-ifelse(is.na(age.cd4),select,
                   ifelse(age.haart-age.cd4>0.25,0,select))
  }
  
  t<-t.event[select==1]
  event.orig<-event[select==1]
  defer.orig<-ifelse(cd4[select==1]<=threshold.cd4,1,0)
  row<-which(par.ests[,8]==threshold.cd4)
  
  pi.est<-par.ests[row,7]                       
  n<-sum(defer.orig)
  
  beta<-se<-NULL
  for (i in 1:n.impute) {
    
    events<-event.orig
    defer<-defer.orig
    t.f<-ifelse(defer==0,0,qggamma(par.ests[row,1],par.ests[row,2],par.ests[row,3],runif(n,0,1)))
    t.star<-t+t.f
    
#   u<-rbinom(1,n,pi.est/(1-pi.est))  ### Changed 2016-01-23 to be more accurate, although essentially same
    u<-rnbinom(1,n,1-pi.est)
    if (u>0) {
      t.g<-qggamma(par.ests[row,4],par.ests[row,5],par.ests[row,6],runif(u,0,1))
      t.star<-c(t.star,t.g)
      defer<-c(defer,rep(1,u))
      events<-c(events,rep(1,u))
    }
    
    if (!is.na(truncate)) {
      t.star<-ifelse(t.star>truncate,truncate,t.star)
      events<-ifelse(t.star>truncate,0,events)
    }
    
    S<-Surv(t.star,events)
    mod<-coxph(S~defer)
    beta[i]<-summary(mod)$coeff[1]
    se[i]<-summary(mod)$coeff[3]
    
    if (discrete) {
      t.star0<-t.star[t.star>0]
      defer0<-defer[t.star>0]
      events0<-events[t.star>0]
      t.star.months<-ceiling(t.star0*12)
      t.months<-1:t.star.months[1]
      defer.m<-rep(defer0[1],t.star.months[1])
      events.m<-rep(0,t.star.months[1])
      events.m[t.star.months[1]]<-events0[1]
      for (j in 2:length(t.star0)) {
        t.months<-c(1:t.star.months[j],t.months)
        defer.m<-c(rep(defer0[j],t.star.months[j]),defer.m)
        stuff<-rep(0,t.star.months[j])
        stuff[t.star.months[j]]<-events0[j]
        events.m<-c(stuff,events.m)
      }
      mod<-glm(events.m~rcs(t.months,4) + defer.m, family="binomial")  
      beta[i]<-summary(mod)$coeff["defer.m",1]
      se[i]<-summary(mod)$coeff["defer.m",2]
    }
    
    
  }
  
  hr<-exp(mean(beta))
  var<-mean(se^2)+var(beta)*(1+1/n.impute)
  ci<-exp(mean(beta)+1.96*sqrt(var)*c(-1,1))
  label<-paste(paste(lower,threshold.cd4-1,sep="-"),paste(threshold.cd4,upper-1,sep="-"),sep=" vs. ")
  return(c(label,hr,ci))
}


####  Naive for quick comparison
# hr.compute<-function(lower,upper,threshold.cd4,cd4,t.event,event) {
#   select1<-ifelse(cd4>lower & cd4<= upper,1,0)
#   select<-ifelse(is.na(select1),0,select1)
#   S<-Surv(t.event[select==1],event[select==1])
#   mod<-coxph(S~cd4[select==1]<=threshold.cd4)
#   hr<-summary(mod)$coeff[1,2]
#   ci<-exp(summary(mod)$coeff[1,1]+1.96*summary(mod)$coeff[1,3]*c(-1,1))
#   label<-paste(paste(lower,threshold.cd4-1,sep="-"),paste(threshold.cd4,upper-1,sep="-"),sep=" vs. ")
#   return(c(label,hr,ci))
# }
# 
# lower<-0
# upper<-200
# threshold<-100


####  Primary ADE/death analyses
set.seed(20130620)
basic.100<-with(d,hr.compute.lt(lower=  -1,upper=200,threshold.cd4=100,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.e)) 
basic.200<-with(d,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.e))
basic.300<-with(d,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.e)) 
basic.400<-with(d,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.e)) 
basic<-rbind(basic.100,basic.200,basic.300,basic.400)

set.seed(20130620)
sterne.100<-with(d1,hr.compute.lt(lower=-1,upper=200,threshold.cd4=100,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.e,truncate=6,cd4.3=TRUE,age.haart=age.haart,age.cd4=age.cd4)) 
sterne.200<-with(d1,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.e,truncate=6,cd4.3=TRUE,age.haart=age.haart,age.cd4=age.cd4))
sterne.300<-with(d1,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.e,truncate=6,cd4.3=TRUE,age.haart=age.haart,age.cd4=age.cd4)) 
sterne.400<-with(d1,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.e,truncate=6,cd4.3=TRUE,age.haart=age.haart,age.cd4=age.cd4)) 
sterne<-rbind(sterne.100,sterne.200,sterne.300,sterne.400)


# ####  Naive, for comparison
# basic.naive.100<-with(d,hr.compute(lower=  0,upper=200,threshold.cd4=100,cd4=cd4,t.event=t.event,event=event))
# basic.naive.200<-with(d,hr.compute(lower=100,upper=300,threshold.cd4=200,cd4=cd4,t.event=t.event,event=event))
# basic.naive.300<-with(d,hr.compute(lower=200,upper=400,threshold.cd4=300,cd4=cd4,t.event=t.event,event=event))
# basic.naive.400<-with(d,hr.compute(lower=300,upper=500,threshold.cd4=400,cd4=cd4,t.event=t.event,event=event))
# basic.naive<-rbind(basic.naive.100,basic.naive.200,basic.naive.300,basic.naive.400)
# 
# sterne.naive.100<-with(d1,hr.compute(lower=  0,upper=200,threshold.cd4=100,cd4=cd4,t.event=t.event,event=event))
# sterne.naive.200<-with(d1,hr.compute(lower=100,upper=300,threshold.cd4=200,cd4=cd4,t.event=t.event,event=event))
# sterne.naive.300<-with(d1,hr.compute(lower=200,upper=400,threshold.cd4=300,cd4=cd4,t.event=t.event,event=event))
# sterne.naive.400<-with(d1,hr.compute(lower=300,upper=500,threshold.cd4=400,cd4=cd4,t.event=t.event,event=event))
# sterne.naive<-rbind(sterne.naive.100,sterne.naive.200,sterne.naive.300,sterne.naive.400)


basic
sterne

# basic.naive
# sterne.naive


pdf("plot-sterne-ade-death.pdf")
par(mar=c(5,.1,.5,.1))
plot(c(log(.25),log(3)),c(0,.9),xlab="Hazard Ratio",ylab="",axes=FALSE,type="n")
abline(v=0,col=gray(.5))
axis(1,at=log(c(.33,.5,1,2,3)),c(.3,.5,1,2,3))
points(log(as.numeric(sterne[1,2])),.8,pch=3)
lines(log(as.numeric(sterne[1,3:4])),c(.8,.8))
points(log(as.numeric(sterne[2,2])),.6,pch=3)
lines(log(as.numeric(sterne[2,3:4])),c(.6,.6))
points(log(as.numeric(sterne[3,2])),.4,pch=3)
lines(log(as.numeric(sterne[3,3:4])),c(.4,.4))
points(log(as.numeric(sterne[4,2])),.2,pch=3)
lines(log(as.numeric(sterne[4,3:4])),c(.2,.2))
text(log(.25),.8,"0-100 vs. 101-200", pos=4,cex=1)
text(log(.25),.6,"101-200 vs. 201-300", pos=4,cex=1)
text(log(.25),.4,"201-300 vs. 301-400", pos=4,cex=1)
text(log(.25),.2,"301-400 vs. 401-500", pos=4,cex=1)
dev.off()


####  Primary Death Analyses
set.seed(20130620)
basic.d.100<-with(d,hr.compute.lt(lower=  -1,upper=200,threshold.cd4=100,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d)) 
basic.d.200<-with(d,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d) )
basic.d.300<-with(d,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d) )
basic.d.400<-with(d,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d) )
basic.d<-rbind(basic.d.100,basic.d.200,basic.d.300,basic.d.400)

set.seed(20130620)
sterne.d.100<-with(d1,hr.compute.lt(lower=  -1,upper=200,threshold.cd4=100,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d,truncate=6,cd4.3=TRUE,age.haart=age.haart,age.cd4=age.cd4)) 
sterne.d.200<-with(d1,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d,truncate=6,cd4.3=TRUE,age.haart=age.haart,age.cd4=age.cd4))
sterne.d.300<-with(d1,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d,truncate=6,cd4.3=TRUE,age.haart=age.haart,age.cd4=age.cd4))
sterne.d.400<-with(d1,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d,truncate=6,cd4.3=TRUE,age.haart=age.haart,age.cd4=age.cd4))
sterne.d<-rbind(sterne.d.100,sterne.d.200,sterne.d.300,sterne.d.400)

basic.d
sterne.d


####  Naive, for comparison
# basic.naive.d.100<-with(d,hr.compute(lower=  -1,upper=200,threshold.cd4=100,cd4=cd4,t.event=t.death,event=death))
# basic.naive.d.200<-with(d,hr.compute(lower=100,upper=300,threshold.cd4=200,cd4=cd4,t.event=t.death,event=death))
# basic.naive.d.300<-with(d,hr.compute(lower=200,upper=400,threshold.cd4=300,cd4=cd4,t.event=t.death,event=death))
# basic.naive.d.400<-with(d,hr.compute(lower=300,upper=500,threshold.cd4=400,cd4=cd4,t.event=t.death,event=death))
# basic.naive.d<-rbind(basic.naive.d.100,basic.naive.d.200,basic.naive.d.300,basic.naive.d.400)
# 
# sterne.naive.d.100<-with(d1,hr.compute(lower=  -1,upper=200,threshold.cd4=100,cd4=cd4,t.event=t.death,event=death))
# sterne.naive.d.200<-with(d1,hr.compute(lower=100,upper=300,threshold.cd4=200,cd4=cd4,t.event=t.death,event=death))
# sterne.naive.d.300<-with(d1,hr.compute(lower=200,upper=400,threshold.cd4=300,cd4=cd4,t.event=t.death,event=death))
# sterne.naive.d.400<-with(d1,hr.compute(lower=300,upper=500,threshold.cd4=400,cd4=cd4,t.event=t.death,event=death))
# sterne.naive.d<-rbind(sterne.naive.d.100,sterne.naive.d.200,sterne.naive.d.300,sterne.naive.d.400)
# 
# 
# basic.naive.d
# sterne.naive.d


pdf("plot-sterne-death.pdf")
par(mar=c(5,.1,.5,.1))
plot(c(log(.25),log(3)),c(0,.9),xlab="Hazard Ratio",ylab="",axes=FALSE,type="n")
abline(v=0,col=gray(.5))
axis(1,at=log(c(.33,.5,1,2,3)),c(.3,.5,1,2,3))
points(log(as.numeric(sterne.d[1,2])),.8,pch=3)
lines(log(as.numeric(sterne.d[1,3:4])),c(.8,.8))
points(log(as.numeric(sterne.d[2,2])),.6,pch=3)
lines(log(as.numeric(sterne.d[2,3:4])),c(.6,.6))
points(log(as.numeric(sterne.d[3,2])),.4,pch=3)
lines(log(as.numeric(sterne.d[3,3:4])),c(.4,.4))
points(log(as.numeric(sterne.d[4,2])),.2,pch=3)
lines(log(as.numeric(sterne.d[4,3:4])),c(.2,.2))
text(log(.25),.8,"0-100 vs. 101-200", pos=4,cex=1)
text(log(.25),.6,"101-200 vs. 201-300", pos=4,cex=1)
text(log(.25),.4,"201-300 vs. 301-400", pos=4,cex=1)
text(log(.25),.2,"301-400 vs. 401-500", pos=4,cex=1)
dev.off()


pdf("plot-sterne-combined.pdf",height=8,width=6)
par(mar=c(5,.1,.5,.1))
plot(c(log(.25),log(3)),c(0,1),xlab="Hazard Ratio",ylab="",axes=FALSE,type="n")
abline(v=0,col=gray(.5))
axis(1,at=log(c(.33,.5,1,2,3)),c(.3,.5,1,2,3))
points(log(as.numeric(sterne.d[1,2])),1,pch=3)
lines(log(as.numeric(sterne.d[1,3:4])),c(1,1))
points(log(as.numeric(sterne.d[2,2])),.9,pch=3)
lines(log(as.numeric(sterne.d[2,3:4])),c(.9,.9))
points(log(as.numeric(sterne.d[3,2])),.8,pch=3)
lines(log(as.numeric(sterne.d[3,3:4])),c(.8,.8))
points(log(as.numeric(sterne.d[4,2])),.7,pch=3)
lines(log(as.numeric(sterne.d[4,3:4])),c(.7,.7))
text(log(.25),1,"0-100 vs. 101-200", pos=4,cex=1)
text(log(.25),.9,"101-200 vs. 201-300", pos=4,cex=1)
text(log(.25),.8,"201-300 vs. 301-400", pos=4,cex=1)
text(log(.25),.7,"301-400 vs. 401-500", pos=4,cex=1)
points(log(as.numeric(sterne[1,2])),.4,pch=3)
lines(log(as.numeric(sterne[1,3:4])),c(.4,.4))
points(log(as.numeric(sterne[2,2])),.3,pch=3)
lines(log(as.numeric(sterne[2,3:4])),c(.3,.3))
points(log(as.numeric(sterne[3,2])),.2,pch=3)
lines(log(as.numeric(sterne[3,3:4])),c(.2,.2))
points(log(as.numeric(sterne[4,2])),.1,pch=3)
lines(log(as.numeric(sterne[4,3:4])),c(.1,.1))
text(log(.25),.4,"0-100 vs. 101-200", pos=4,cex=1)
text(log(.25),.3,"101-200 vs. 201-300", pos=4,cex=1)
text(log(.25),.2,"201-300 vs. 301-400", pos=4,cex=1)
text(log(.25),.1,"301-400 vs. 401-500", pos=4,cex=1)
dev.off()


pdf("plot-sterne-combined-plus.pdf",height=8,width=6)
par(mar=c(5,.1,.5,.1))
plot(c(log(.25),log(3)),c(0,1),xlab="Hazard Ratio",ylab="",axes=FALSE,type="n")
abline(v=0,col=gray(.5))
axis(1,at=log(c(.33,.5,1,2,3)),c(.3,.5,1,2,3))
points(log(as.numeric(sterne.d[1,2])),1,pch=3)
lines(log(as.numeric(sterne.d[1,3:4])),c(1,1))
points(log(as.numeric(sterne.d[2,2])),.9,pch=3)
lines(log(as.numeric(sterne.d[2,3:4])),c(.9,.9))
points(log(as.numeric(sterne.d[3,2])),.8,pch=3)
lines(log(as.numeric(sterne.d[3,3:4])),c(.8,.8))
points(log(as.numeric(sterne.d[4,2])),.7,pch=3)
lines(log(as.numeric(sterne.d[4,3:4])),c(.7,.7))
text(log(.25),1,"0-100 vs. 101-200", pos=4,cex=1)
text(log(.25),.9,"101-200 vs. 201-300", pos=4,cex=1)
text(log(.25),.8,"201-300 vs. 301-400", pos=4,cex=1)
text(log(.25),.7,"301-400 vs. 401-500", pos=4,cex=1)
points(log(as.numeric(sterne[1,2])),.4,pch=3)
lines(log(as.numeric(sterne[1,3:4])),c(.4,.4))
points(log(as.numeric(sterne[2,2])),.3,pch=3)
lines(log(as.numeric(sterne[2,3:4])),c(.3,.3))
points(log(as.numeric(sterne[3,2])),.2,pch=3)
lines(log(as.numeric(sterne[3,3:4])),c(.2,.2))
points(log(as.numeric(sterne[4,2])),.1,pch=3)
lines(log(as.numeric(sterne[4,3:4])),c(.1,.1))
text(log(.25),.4,"0-100 vs. 101-200", pos=4,cex=1)
text(log(.25),.3,"101-200 vs. 201-300", pos=4,cex=1)
text(log(.25),.2,"201-300 vs. 301-400", pos=4,cex=1)
text(log(.25),.1,"301-400 vs. 401-500", pos=4,cex=1)

points(log(2.04),0.98,pch=3,col=2)
lines(log(c(1.70,2.46)),c(.98,.98),col=2)
points(log(1.34),0.88,pch=3,col=2)
lines(log(c(1.05,1.71)),c(.88,.88),col=2)
points(log(1.25),0.78,pch=3,col=2)
lines(log(c(0.86,1.82)),c(.78,.78),col=2)
points(log(1.01),0.68,pch=3,col=2)
lines(log(c(0.68,1.50)),c(.68,.68),col=2)

points(log(3.35),0.38,pch=3,col=2)
lines(log(c(2.99,3.75)),c(.38,.38),col=2)
points(log(2.21),0.28,pch=3,col=2)
lines(log(c(1.91,2.56)),c(.28,.28),col=2)
points(log(1.34),0.18,pch=3,col=2)
lines(log(c(1.12,1.61)),c(.18,.18),col=2)
points(log(1.09),0.08,pch=3,col=2)
lines(log(c(0.85,1.38)),c(.08,.08),col=2)

dev.off()


#########  Now I'm including some NA-ACCORD plots

load("answers-orig-hernan.Rda")

pdf("plot-na-accord.pdf",height=8,width=6)
par(mar=c(5,.1,.5,.1))
plot(c(log(.25),log(3)),c(0,1),xlab="Hazard Ratio",ylab="",axes=FALSE,type="n")
abline(v=0,col=gray(.5))
axis(1,at=log(c(.33,.5,1,2,3)),c(.3,.5,1,2,3))
points(log(as.numeric(ans.o[1,1])),.75,pch=3)
lines(log(as.numeric(ans.o[1,2:3])),c(.75,.75))
text(log(.25),.75,"<350 vs. <500", pos=4, cex=1)
points(log(as.numeric(ans.o[2,1])),.65,pch=3)
lines(log(as.numeric(ans.o[2,2:3])),c(.65,.65))
text(log(.25),.65,"<500 vs. >=500", pos=4, cex=1)
dev.off()


pdf("plot-na-accord-plus.pdf",height=8,width=6)
par(mar=c(5,.1,.5,.1))
plot(c(log(.25),log(3)),c(0,1),xlab="Hazard Ratio",ylab="",axes=FALSE,type="n")
abline(v=0,col=gray(.5))
axis(1,at=log(c(.33,.5,1,2,3)),c(.3,.5,1,2,3))
points(log(as.numeric(ans.o[1,1])),.75,pch=3)
lines(log(as.numeric(ans.o[1,2:3])),c(.75,.75))
text(log(.25),.75,"<350 vs. <500", pos=4, cex=1)
points(log(as.numeric(ans.o[2,1])),.65,pch=3)
lines(log(as.numeric(ans.o[2,2:3])),c(.65,.65))
text(log(.25),.65,"<500 vs. >=500", pos=4, cex=1)

points(log(1.69),0.73,pch=3,col=2)
lines(log(c(1.26,2.26)),c(0.73,0.73),col=2)
points(log(1.94),0.63,pch=3,col=2)
lines(log(c(1.37,2.79)),c(0.63,0.63),col=2)
dev.off()


pdf("plot-na-accord-combined.pdf",height=8,width=6)
par(mar=c(5,.1,.5,.1))
plot(c(log(.25),log(3)),c(0,1),xlab="Hazard Ratio",ylab="",axes=FALSE,type="n")
abline(v=0,col=gray(.5))
axis(1,at=log(c(.33,.5,1,2,3)),c(.3,.5,1,2,3))
points(log(as.numeric(ans.o[1,1])),.75,pch=3)
lines(log(as.numeric(ans.o[1,2:3])),c(.75,.75))
text(log(.25),.75,"<350 vs. <500", pos=4, cex=1)
points(log(as.numeric(ans.o[2,1])),.65,pch=3)
lines(log(as.numeric(ans.o[2,2:3])),c(.65,.65))
text(log(.25),.65,"<500 vs. >=500", pos=4, cex=1)

axis(1,at=log(c(.33,.5,1,2,3)),c(.3,.5,1,2,3))
points(log(as.numeric(ans.o1[1,1])),.15,pch=3)
lines(log(as.numeric(ans.o1[1,2:3])),c(.15,.15))
text(log(.25),.15,"<350 vs. <500", pos=4, cex=1)
points(log(as.numeric(ans.o1[2,1])),.05,pch=3)
lines(log(as.numeric(ans.o1[2,2:3])),c(.05,.05))
text(log(.25),.075,"<500 vs. >=500", pos=4, cex=1)
dev.off()


#####  Now putting plot-sterne-combined-plus and plot-na-accord-plus together into one Figure 2.
pdf("figure2-combined.pdf",height=8,width=12)
par(mfrow=c(1,2),mar=c(5,.1,.5,.1),xpd=NA)
plot(c(log(.25),log(3)),c(0,1.05),xlab="Hazard Ratio, MI Approach",ylab="",axes=FALSE,type="n",cex.lab=1.2)
lines(c(0,0),c(-.05,.45),col=gray(.7))
lines(c(0,0),c(.55,1.04),col=gray(.7))
#abline(v=0,col=gray(.5))
axis(1,at=log(c(.33,.5,1,2,3)),c(.3,.5,1,2,3))
points(log(as.numeric(sterne.d[1,2])),1,pch=3)
lines(log(as.numeric(sterne.d[1,3:4])),c(1,1))
points(log(as.numeric(sterne.d[2,2])),.9,pch=3)
lines(log(as.numeric(sterne.d[2,3:4])),c(.9,.9))
points(log(as.numeric(sterne.d[3,2])),.8,pch=3)
lines(log(as.numeric(sterne.d[3,3:4])),c(.8,.8))
points(log(as.numeric(sterne.d[4,2])),.7,pch=3)
lines(log(as.numeric(sterne.d[4,3:4])),c(.7,.7))
text(log(.25),1,"0-100 vs. 101-200", pos=4,cex=1)
text(log(.25),.9,"101-200 vs. 201-300", pos=4,cex=1)
text(log(.25),.8,"201-300 vs. 301-400", pos=4,cex=1)
text(log(.25),.7,"301-400 vs. 401-500", pos=4,cex=1)
points(log(as.numeric(sterne[1,2])),.4,pch=3)
lines(log(as.numeric(sterne[1,3:4])),c(.4,.4))
points(log(as.numeric(sterne[2,2])),.3,pch=3)
lines(log(as.numeric(sterne[2,3:4])),c(.3,.3))
points(log(as.numeric(sterne[3,2])),.2,pch=3)
lines(log(as.numeric(sterne[3,3:4])),c(.2,.2))
points(log(as.numeric(sterne[4,2])),.1,pch=3)
lines(log(as.numeric(sterne[4,3:4])),c(.1,.1))
text(log(.25),.4,"0-100 vs. 101-200", pos=4,cex=1)
text(log(.25),.3,"101-200 vs. 201-300", pos=4,cex=1)
text(log(.25),.2,"201-300 vs. 301-400", pos=4,cex=1)
text(log(.25),.1,"301-400 vs. 401-500", pos=4,cex=1)

points(log(2.04),0.98,pch=3,col=2)
lines(log(c(1.70,2.46)),c(.98,.98),col=2)
points(log(1.34),0.88,pch=3,col=2)
lines(log(c(1.05,1.71)),c(.88,.88),col=2)
points(log(1.25),0.78,pch=3,col=2)
lines(log(c(0.86,1.82)),c(.78,.78),col=2)
points(log(1.01),0.68,pch=3,col=2)
lines(log(c(0.68,1.50)),c(.68,.68),col=2)

points(log(3.35),0.38,pch=3,col=2)
lines(log(c(2.99,3.75)),c(.38,.38),col=2)
points(log(2.21),0.28,pch=3,col=2)
lines(log(c(1.91,2.56)),c(.28,.28),col=2)
points(log(1.34),0.18,pch=3,col=2)
lines(log(c(1.12,1.61)),c(.18,.18),col=2)
points(log(1.09),0.08,pch=3,col=2)
lines(log(c(0.85,1.38)),c(.08,.08),col=2)

#par(mar=c(5,.1,.5,.1))
plot(c(log(.25),log(3)),c(0,1.05),xlab="Hazard Ratio, dMSM Approach",ylab="",axes=FALSE,type="n",cex.lab=1.2)
lines(c(0,0),c(-.05,.45),col=gray(.7))
lines(c(0,0),c(.55,1.04),col=gray(.7))
#abline(v=0,col=gray(.7))
axis(1,at=log(c(.33,.5,1,2,3)),c(.3,.5,1,2,3))
points(log(as.numeric(ans.o[1,1])),.75,pch=3)
lines(log(as.numeric(ans.o[1,2:3])),c(.75,.75))
text(log(.25),.75,"<350 vs. <500", pos=4, cex=1)
points(log(as.numeric(ans.o[2,1])),.65,pch=3)
lines(log(as.numeric(ans.o[2,2:3])),c(.65,.65))
text(log(.25),.65,"<500 vs. >=500", pos=4, cex=1)

points(log(1.69),0.73,pch=3,col=2)
lines(log(c(1.26,2.26)),c(0.73,0.73),col=2)
points(log(1.94),0.63,pch=3,col=2)
lines(log(c(1.37,2.79)),c(0.63,0.63),col=2)


text(log(.25),1.07,"Death",cex=1.3)
text(log(.25),.5,"AIDS or Death",cex=1.3)

dev.off()


#######  Now making consistent with NA-ACCORD basic analysis
# 
# do not add time
# no administrative censoring
# do not exclude deaths
# do not exclude IDU
# no covariates
# length of CD4 window=6
# no ltfu weighting
# discretize time
# no stratifying


set.seed(20130620)
library(rms)
sterne.d.100b<-with(d,hr.compute.lt(lower=  -1,upper=200,threshold.cd4=100,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d,discrete=TRUE)) 
sterne.d.200b<-with(d,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d,discrete=TRUE))
sterne.d.300b<-with(d,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d,discrete=TRUE))
sterne.d.400b<-with(d,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d,discrete=TRUE))
sterne.d.b<-rbind(sterne.d.100b,sterne.d.200b,sterne.d.300b,sterne.d.400b)


set.seed(20130620)
library(rms)
sterne.100b<-with(d,hr.compute.lt(lower=  -1,upper=200,threshold.cd4=100,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d,discrete=TRUE)) 
sterne.200b<-with(d,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d,discrete=TRUE))
sterne.300b<-with(d,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d,discrete=TRUE))
sterne.400b<-with(d,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d,discrete=TRUE))
sterne.b<-rbind(sterne.100b,sterne.200b,sterne.300b,sterne.400b)

save(sterne.b,sterne.d.b,file="basic-sterne-results.Rda")


mi.answers<-function(filename){  
  load(paste(filename,".Rda",sep=""))
  est<-ests
  hr<-est[1]
  lower<-est[2]
  upper<-est[3]
  type<-strsplit(filename,"-")[[1]][1]
  type.long<-ifelse(type=="basic","Basic, ADE/death",
                    ifelse(type=="censor6","Censor after 6 years",
                           ifelse(type=="exdeath","Exclude deaths w/in 2 wks of ART",
                                  ifelse(type=="exIDU","Exclude IDU",
                                         ifelse(type=="cov","Adjust for sex, baseline age and CD4",
                                                ifelse(type=="lost","Account for LTFU with weights",
                                                       ifelse(type=="strat","Stratify by site and year",
                                                              ifelse(type=="death","Basic, death",
                                                                     ifelse(type=="add2","Add 2 mos. to those w/out death",type)))))))))
  stuff<-c(hr,lower,upper,u.limit,l.limit,type,type.long)
  return(stuff)
}


ans.d.b<-rbind(mi.answers("death-200vs100"),mi.answers("death-300vs200"),mi.answers("death-400vs300"),
               mi.answers("death-500vs400"))

pdf("plot-comparison-death.pdf",height=7,width=6)
par(mar=c(5,.1,.5,.1))
plot(c(log(.25),log(3)),c(0,.9),xlab="Hazard Ratio",ylab="",axes=FALSE,type="n")
abline(v=0,col=gray(.5))
axis(1,at=log(c(.33,.5,1,2,3)),c(.3,.5,1,2,3))
points(log(as.numeric(sterne.d.b[1,2])),.8,pch=3)
lines(log(as.numeric(sterne.d.b[1,3:4])),c(.8,.8))
text(log(.25),.8,"200 vs. 100", pos=4, cex=1)
points(log(as.numeric(sterne.d.b[2,2])),.6,pch=3)
lines(log(as.numeric(sterne.d.b[2,3:4])),c(.6,.6))
text(log(.25),.6,"300 vs. 200", pos=4, cex=1)
points(log(as.numeric(sterne.d.b[3,2])),.4,pch=3)
lines(log(as.numeric(sterne.d.b[3,3:4])),c(.4,.4))
text(log(.25),.4,"400 vs. 300", pos=4, cex=1)
points(log(as.numeric(sterne.d.b[4,2])),.2,pch=3)
lines(log(as.numeric(sterne.d.b[4,3:4])),c(.2,.2))
text(log(.25),.2,"500 vs. 400", pos=4, cex=1)
points(log(as.numeric(ans.d.b[1,1])),.75,pch=3,col=4)
lines(log(as.numeric(ans.d.b[1,2:3])),c(.75,.75),col=4)
points(log(as.numeric(ans.d.b[2,1])),.55,pch=3,col=4)
lines(log(as.numeric(ans.d.b[2,2:3])),c(.55,.55),col=4)
points(log(as.numeric(ans.d.b[3,1])),.35,pch=3,col=4)
lines(log(as.numeric(ans.d.b[3,2:3])),c(.35,.35),col=4)
points(log(as.numeric(ans.d.b[4,1])),.15,pch=3,col=4)
lines(log(as.numeric(ans.d.b[4,2:3])),c(.15,.15),col=4)
dev.off()


ans.b<-rbind(mi.answers("basic-200vs100"),mi.answers("basic-300vs200"),mi.answers("basic-400vs300"),
             mi.answers("basic-500vs400"))

pdf("plot-comparison.pdf",height=7,width=6)
par(mar=c(5,.1,.5,.1))
plot(c(log(.25),log(3)),c(0,.9),xlab="Hazard Ratio",ylab="",axes=FALSE,type="n")
abline(v=0,col=gray(.5))
axis(1,at=log(c(.33,.5,1,2,3)),c(.3,.5,1,2,3))
points(log(as.numeric(sterne.b[1,2])),.8,pch=3)
lines(log(as.numeric(sterne.b[1,3:4])),c(.8,.8))
text(log(.25),.8,"200 vs. 100", pos=4, cex=1)
points(log(as.numeric(sterne.b[2,2])),.6,pch=3)
lines(log(as.numeric(sterne.b[2,3:4])),c(.6,.6))
text(log(.25),.6,"300 vs. 200", pos=4, cex=1)
points(log(as.numeric(sterne.b[3,2])),.4,pch=3)
lines(log(as.numeric(sterne.b[3,3:4])),c(.4,.4))
text(log(.25),.4,"400 vs. 300", pos=4, cex=1)
points(log(as.numeric(sterne.b[4,2])),.2,pch=3)
lines(log(as.numeric(sterne.b[4,3:4])),c(.2,.2))
text(log(.25),.2,"500 vs. 400", pos=4, cex=1)
points(log(as.numeric(ans.b[1,1])),.75,pch=3,col=4)
lines(log(as.numeric(ans.b[1,2:3])),c(.75,.75),col=4)
points(log(as.numeric(ans.b[2,1])),.55,pch=3,col=4)
lines(log(as.numeric(ans.b[2,2:3])),c(.55,.55),col=4)
points(log(as.numeric(ans.b[3,1])),.35,pch=3,col=4)
lines(log(as.numeric(ans.b[3,2:3])),c(.35,.35),col=4)
points(log(as.numeric(ans.b[4,1])),.15,pch=3,col=4)
lines(log(as.numeric(ans.b[4,2:3])),c(.15,.15),col=4)
dev.off()


######  Sensitivity analyses

##### Basic, discrete time
sterne.d.b

##### Basic, continuous time
set.seed(20130620)
sterne.d.100cont<-with(d,hr.compute.lt(lower=  -1,upper=200,threshold.cd4=100,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d)) 
sterne.d.200cont<-with(d,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d))
sterne.d.300cont<-with(d,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d))
sterne.d.400cont<-with(d,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d))
sterne.d.cont<-rbind(sterne.d.100cont,sterne.d.200cont,sterne.d.300cont,sterne.d.400cont)

##### Basic, censor for visit gap > 1 year
set.seed(20130620)
sterne.d.100gap1yearltfu<-with(d.gap1year.ltfu,hr.compute.lt(lower=  -1,upper=200,threshold.cd4=100,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d)) 
sterne.d.200gap1yearltfu<-with(d.gap1year.ltfu,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d))
sterne.d.300gap1yearltfu<-with(d.gap1year.ltfu,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d))
sterne.d.400gap1yearltfu<-with(d.gap1year.ltfu,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d))
sterne.d.gap1yearltfu<-rbind(sterne.d.100gap1yearltfu,sterne.d.200gap1yearltfu,sterne.d.300gap1yearltfu,sterne.d.400gap1yearltfu)

##### Basic, continuous time, truncate at 6 years
set.seed(20130620)
sterne.d.100trunc6<-with(d,hr.compute.lt(lower=  -1,upper=200,threshold.cd4=100,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d,truncate=6)) 
sterne.d.200trunc6<-with(d,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d,truncate=6))
sterne.d.300trunc6<-with(d,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d,truncate=6))
sterne.d.400trunc6<-with(d,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d,truncate=6))
sterne.d.trunc6<-rbind(sterne.d.100trunc6,sterne.d.200trunc6,sterne.d.300trunc6,sterne.d.400trunc6)

##### Basic, continuous time, add two months
set.seed(20130620)
sterne.d.100add2<-with(d.add2mo,hr.compute.lt(lower=  -1,upper=200,threshold.cd4=100,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d)) 
sterne.d.200add2<-with(d.add2mo,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d))
sterne.d.300add2<-with(d.add2mo,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d))
sterne.d.400add2<-with(d.add2mo,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d))
sterne.d.add2<-rbind(sterne.d.100add2,sterne.d.200add2,sterne.d.300add2,sterne.d.400add2)

##### Basic, continuous time, exclude IDU
set.seed(20130620)
sterne.d.100exidu<-with(d.n.idu,hr.compute.lt(lower=  -1,upper=200,threshold.cd4=100,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d)) 
sterne.d.200exidu<-with(d.n.idu,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d))
sterne.d.300exidu<-with(d.n.idu,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d))
sterne.d.400exidu<-with(d.n.idu,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d))
sterne.d.exidu<-rbind(sterne.d.100exidu,sterne.d.200exidu,sterne.d.300exidu,sterne.d.400exidu)

##### Basic, excluding deaths in first 2 weeks of HAART
set.seed(20130620)
sterne.d.100exdeath<-with(d.exc.deaths2wks,hr.compute.lt(lower=  -1,upper=200,threshold.cd4=100,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d)) 
sterne.d.200exdeath<-with(d.exc.deaths2wks,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d))
sterne.d.300exdeath<-with(d.exc.deaths2wks,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d))
sterne.d.400exdeath<-with(d.exc.deaths2wks,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d))
sterne.d.exdeath<-rbind(sterne.d.100exdeath,sterne.d.200exdeath,sterne.d.300exdeath,sterne.d.400exdeath)

##### Basic, 3 month CD4 window
set.seed(20130620)
sterne.d.100cd43<-with(d,hr.compute.lt(lower=  -1,upper=200,threshold.cd4=100,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d,cd4.3=TRUE,age.haart=age.haart,age.cd4=age.cd4)) 
sterne.d.200cd43<-with(d,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d,cd4.3=TRUE,age.haart=age.haart,age.cd4=age.cd4))
sterne.d.300cd43<-with(d,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d,cd4.3=TRUE,age.haart=age.haart,age.cd4=age.cd4))
sterne.d.400cd43<-with(d,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=20,cd4=cd4,t.event=t.death,event=death,par.ests=par.ests.d,cd4.3=TRUE,age.haart=age.haart,age.cd4=age.cd4))
sterne.d.cd43<-rbind(sterne.d.100cd43,sterne.d.200cd43,sterne.d.300cd43,sterne.d.400cd43)


##### Basic, discrete time
sterne.b

##### Basic, continuous time
set.seed(20130620)
sterne.100cont<-with(d,hr.compute.lt(lower=  -1,upper=200,threshold.cd4=100,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d)) 
sterne.200cont<-with(d,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d))
sterne.300cont<-with(d,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d))
sterne.400cont<-with(d,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d))
sterne.cont<-rbind(sterne.100cont,sterne.200cont,sterne.300cont,sterne.400cont)

##### Basic, continuous time, truncate at 6 years
set.seed(20130620)
sterne.100trunc6<-with(d,hr.compute.lt(lower=  -1,upper=200,threshold.cd4=100,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d,truncate=6)) 
sterne.200trunc6<-with(d,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d,truncate=6))
sterne.300trunc6<-with(d,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d,truncate=6))
sterne.400trunc6<-with(d,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d,truncate=6))
sterne.trunc6<-rbind(sterne.100trunc6,sterne.200trunc6,sterne.300trunc6,sterne.400trunc6)

##### Basic, continuous time, add two months
set.seed(20130620)
sterne.100add2<-with(d.add2mo,hr.compute.lt(lower=  -1,upper=200,threshold.cd4=100,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d)) 
sterne.200add2<-with(d.add2mo,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d))
sterne.300add2<-with(d.add2mo,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d))
sterne.400add2<-with(d.add2mo,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d))
sterne.add2<-rbind(sterne.100add2,sterne.200add2,sterne.300add2,sterne.400add2)

##### Basic, continuous time, exclude IDU
set.seed(20130620)
sterne.100exidu<-with(d.n.idu,hr.compute.lt(lower=  -1,upper=200,threshold.cd4=100,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d)) 
sterne.200exidu<-with(d.n.idu,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d))
sterne.300exidu<-with(d.n.idu,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d))
sterne.400exidu<-with(d.n.idu,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d))
sterne.exidu<-rbind(sterne.100exidu,sterne.200exidu,sterne.300exidu,sterne.400exidu)

##### Basic, excluding deaths in first 2 weeks of HAART
set.seed(20130620)
sterne.100exdeath<-with(d.exc.deaths2wks,hr.compute.lt(lower=  -1,upper=200,threshold.cd4=100,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d)) 
sterne.200exdeath<-with(d.exc.deaths2wks,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d))
sterne.300exdeath<-with(d.exc.deaths2wks,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d))
sterne.400exdeath<-with(d.exc.deaths2wks,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d))
sterne.exdeath<-rbind(sterne.100exdeath,sterne.200exdeath,sterne.300exdeath,sterne.400exdeath)

##### Basic, 3 month CD4 window
set.seed(20130620)
sterne.100cd43<-with(d,hr.compute.lt(lower=  -1,upper=200,threshold.cd4=100,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d,cd4.3=TRUE,age.haart=age.haart,age.cd4=age.cd4)) 
sterne.200cd43<-with(d,hr.compute.lt(lower=100,upper=300,threshold.cd4=200,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d,cd4.3=TRUE,age.haart=age.haart,age.cd4=age.cd4))
sterne.300cd43<-with(d,hr.compute.lt(lower=200,upper=400,threshold.cd4=300,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d,cd4.3=TRUE,age.haart=age.haart,age.cd4=age.cd4))
sterne.400cd43<-with(d,hr.compute.lt(lower=300,upper=500,threshold.cd4=400,n.impute=20,cd4=cd4,t.event=t.event,event=event,par.ests=par.ests.d,cd4.3=TRUE,age.haart=age.haart,age.cd4=age.cd4))
sterne.cd43<-rbind(sterne.100cd43,sterne.200cd43,sterne.300cd43,sterne.400cd43)


pdf("cole-results.pdf",width=9,height=4)

n.cat<-8
y.pos<-1-c(1:n.cat)/(n.cat+1)
ulim<-200
text.pos<- -1
xlimits=c(-1.1,1.1)

par(mfrow=c(1,5), mar=c(5,.5,3,.2))

plot(c(-1,1),c(0,1),type="n",xlab="",ylab="",axes=FALSE)
text(text.pos,y.pos[1],"Basic, death",pos=4,cex=.9)
text(text.pos,y.pos[2],"Exclude deaths w/in 2 wks of ART",pos=4,cex=.9)
text(text.pos,y.pos[3],"Add 2 mos. to those w/out death",pos=4,cex=.9)
text(text.pos,y.pos[4],"Censor after 6 years",pos=4,cex=.9)
text(text.pos,y.pos[5],"Exclude IDU",pos=4,cex=.9)
text(text.pos,y.pos[6],"3 month CD4 window",pos=4,cex=.9)
text(text.pos,y.pos[7],"Censor if visit gap > 1 year",pos=4,cex=.9) 
text(text.pos,y.pos[8],"Discretize time",pos=4,cex=.9)

plot(xlimits,c(0,1),type="n",xlab="Hazard Ratio",ylab="",axes=FALSE)
title("100 vs. 200 (ref)")
axis(1,at=c(log(0.33),log(0.5),0,log(2),log(3)),c(0.3,0.5,1,2,3))
abline(v=0,col=gray(.8))
points(log(as.numeric(c(sterne.d.cont[1,2],sterne.d.exdeath[1,2],sterne.d.add2[1,2],sterne.d.trunc6[1,2],sterne.d.exidu[1,2],
                        sterne.d.cd43[1,2],sterne.d.gap1yearltfu[1,2],sterne.d.b[1,2]))),y.pos,pch=3)
lines(log(as.numeric(sterne.d.cont[1,3:4])),rep(y.pos[1],2))
lines(log(as.numeric(sterne.d.exdeath[1,3:4])),rep(y.pos[2],2))
lines(log(as.numeric(sterne.d.add2[1,3:4])),rep(y.pos[3],2))
lines(log(as.numeric(sterne.d.trunc6[1,3:4])),rep(y.pos[4],2))
lines(log(as.numeric(sterne.d.exidu[1,3:4])),rep(y.pos[5],2))
lines(log(as.numeric(sterne.d.cd43[1,3:4])),rep(y.pos[6],2))
lines(log(as.numeric(sterne.d.gap1yearltfu[1,3:4])),rep(y.pos[7],2))
lines(log(as.numeric(sterne.d.b[1,3:4])),rep(y.pos[8],2))

plot(xlimits,c(0,1),type="n",xlab="Hazard Ratio",ylab="",axes=FALSE)
title("200 vs. 300 (ref)")
axis(1,at=c(log(0.33),log(0.5),0,log(2),log(3)),c(0.3,0.5,1,2,3))
abline(v=0,col=gray(.8))
points(log(as.numeric(c(sterne.d.cont[2,2],sterne.d.exdeath[2,2],sterne.d.add2[2,2],sterne.d.trunc6[2,2],sterne.d.exidu[2,2],
                        sterne.d.cd43[2,2],sterne.d.gap1yearltfu[2,2],sterne.d.b[2,2]))),y.pos,pch=3)
lines(log(as.numeric(sterne.d.cont[2,3:4])),rep(y.pos[1],2))
lines(log(as.numeric(sterne.d.exdeath[2,3:4])),rep(y.pos[2],2))
lines(log(as.numeric(sterne.d.add2[2,3:4])),rep(y.pos[3],2))
lines(log(as.numeric(sterne.d.trunc6[2,3:4])),rep(y.pos[4],2))
lines(log(as.numeric(sterne.d.exidu[2,3:4])),rep(y.pos[5],2))
lines(log(as.numeric(sterne.d.cd43[2,3:4])),rep(y.pos[6],2))
lines(log(as.numeric(sterne.d.gap1yearltfu[2,3:4])),rep(y.pos[7],2))
lines(log(as.numeric(sterne.d.b[2,3:4])),rep(y.pos[8],2))

plot(xlimits,c(0,1),type="n",xlab="Hazard Ratio",ylab="",axes=FALSE)
title("300 vs. 400 (ref)")
axis(1,at=c(log(0.33),log(0.5),0,log(2),log(3)),c(0.3,0.5,1,2,3))
abline(v=0,col=gray(.8))
points(log(as.numeric(c(sterne.d.cont[3,2],sterne.d.exdeath[3,2],sterne.d.add2[3,2],sterne.d.trunc6[3,2],sterne.d.exidu[3,2],
                        sterne.d.cd43[3,2],sterne.d.gap1yearltfu[3,2],sterne.d.b[3,2]))),y.pos,pch=3)
lines(log(as.numeric(sterne.d.cont[3,3:4])),rep(y.pos[1],2))
lines(log(as.numeric(sterne.d.exdeath[3,3:4])),rep(y.pos[2],2))
lines(log(as.numeric(sterne.d.add2[3,3:4])),rep(y.pos[3],2))
lines(log(as.numeric(sterne.d.trunc6[3,3:4])),rep(y.pos[4],2))
lines(log(as.numeric(sterne.d.exidu[3,3:4])),rep(y.pos[5],2))
lines(log(as.numeric(sterne.d.cd43[3,3:4])),rep(y.pos[6],2))
lines(log(as.numeric(sterne.d.gap1yearltfu[3,3:4])),rep(y.pos[7],2))
lines(log(as.numeric(sterne.d.b[3,3:4])),rep(y.pos[8],2))

plot(xlimits,c(0,1),type="n",xlab="Hazard Ratio",ylab="",axes=FALSE)
title("400 vs. 500 (ref)")
axis(1,at=c(log(0.33),log(0.5),0,log(2),log(3)),c(0.3,0.5,1,2,3))
abline(v=0,col=gray(.8))
points(log(as.numeric(c(sterne.d.cont[4,2],sterne.d.exdeath[4,2],sterne.d.add2[4,2],sterne.d.trunc6[4,2],sterne.d.exidu[4,2],
                        sterne.d.cd43[4,2],sterne.d.gap1yearltfu[4,2],sterne.d.b[4,2]))),y.pos,pch=3)
lines(log(as.numeric(sterne.d.cont[4,3:4])),rep(y.pos[1],2))
lines(log(as.numeric(sterne.d.exdeath[4,3:4])),rep(y.pos[2],2))
lines(log(as.numeric(sterne.d.add2[4,3:4])),rep(y.pos[3],2))
lines(log(as.numeric(sterne.d.trunc6[4,3:4])),rep(y.pos[4],2))
lines(log(as.numeric(sterne.d.exidu[4,3:4])),rep(y.pos[5],2))
lines(log(as.numeric(sterne.d.cd43[4,3:4])),rep(y.pos[6],2))
lines(log(as.numeric(sterne.d.gap1yearltfu[4,3:4])),rep(y.pos[7],2))
lines(log(as.numeric(sterne.d.b[4,3:4])),rep(y.pos[8],2))

dev.off()


pdf("cole-results1.pdf",width=9,height=4)

n.cat<-7
y.pos<-1-c(1:n.cat)/(n.cat+1)
ulim<-200
text.pos<- -1
xlimits=c(-1.1,1.1)

par(mfrow=c(1,5), mar=c(5,.5,3,.2))

plot(c(-1,1),c(0,1),type="n",xlab="",ylab="",axes=FALSE)
text(text.pos,y.pos[1],"Basic, ADE/death",pos=4,cex=.9)
text(text.pos,y.pos[2],"Exclude deaths w/in 2 wks of ART",pos=4,cex=.9)
text(text.pos,y.pos[3],"Add 2 mos. to those w/out death",pos=4,cex=.9)
text(text.pos,y.pos[4],"Censor after 6 years",pos=4,cex=.9)
text(text.pos,y.pos[5],"Discretize time",pos=4,cex=.9)
text(text.pos,y.pos[6],"3 month CD4 window",pos=4,cex=.9)
text(text.pos,y.pos[7],"Exclude IDU",pos=4,cex=.9)

plot(xlimits,c(0,1),type="n",xlab="Hazard Ratio",ylab="",axes=FALSE)
title("100 vs. 200 (ref)")
axis(1,at=c(log(0.33),log(0.5),0,log(2),log(3)),c(0.3,0.5,1,2,3))
abline(v=0,col=gray(.8))
points(log(as.numeric(c(sterne.cont[1,2],sterne.exdeath[1,2],sterne.add2[1,2],sterne.trunc6[1,2],sterne.b[1,2],
                        sterne.cd43[1,2],sterne.exidu[1,2]))),y.pos,pch=3)
lines(log(as.numeric(sterne.cont[1,3:4])),rep(y.pos[1],2))
lines(log(as.numeric(sterne.exdeath[1,3:4])),rep(y.pos[2],2))
lines(log(as.numeric(sterne.add2[1,3:4])),rep(y.pos[3],2))
lines(log(as.numeric(sterne.trunc6[1,3:4])),rep(y.pos[4],2))
lines(log(as.numeric(sterne.b[1,3:4])),rep(y.pos[5],2))
lines(log(as.numeric(sterne.cd43[1,3:4])),rep(y.pos[6],2))
lines(log(as.numeric(sterne.exidu[1,3:4])),rep(y.pos[7],2))

plot(xlimits,c(0,1),type="n",xlab="Hazard Ratio",ylab="",axes=FALSE)
title("200 vs. 300 (ref)")
axis(1,at=c(log(0.33),log(0.5),0,log(2),log(3)),c(0.3,0.5,1,2,3))
abline(v=0,col=gray(.8))
points(log(as.numeric(c(sterne.cont[2,2],sterne.exdeath[2,2],sterne.add2[2,2],sterne.trunc6[2,2],sterne.b[2,2],
                        sterne.cd43[2,2],sterne.exidu[2,2]))),y.pos,pch=3)
lines(log(as.numeric(sterne.cont[2,3:4])),rep(y.pos[1],2))
lines(log(as.numeric(sterne.exdeath[2,3:4])),rep(y.pos[2],2))
lines(log(as.numeric(sterne.add2[2,3:4])),rep(y.pos[3],2))
lines(log(as.numeric(sterne.trunc6[2,3:4])),rep(y.pos[4],2))
lines(log(as.numeric(sterne.b[2,3:4])),rep(y.pos[5],2))
lines(log(as.numeric(sterne.cd43[2,3:4])),rep(y.pos[6],2))
lines(log(as.numeric(sterne.exidu[2,3:4])),rep(y.pos[7],2))

plot(xlimits,c(0,1),type="n",xlab="Hazard Ratio",ylab="",axes=FALSE)
title("300 vs. 400 (ref)")
axis(1,at=c(log(0.33),log(0.5),0,log(2),log(3)),c(0.3,0.5,1,2,3))
abline(v=0,col=gray(.8))
points(log(as.numeric(c(sterne.cont[3,2],sterne.exdeath[3,2],sterne.add2[3,2],sterne.trunc6[3,2],sterne.b[3,2],
                        sterne.cd43[3,2],sterne.exidu[3,2]))),y.pos,pch=3)
lines(log(as.numeric(sterne.cont[3,3:4])),rep(y.pos[1],2))
lines(log(as.numeric(sterne.exdeath[3,3:4])),rep(y.pos[2],2))
lines(log(as.numeric(sterne.add2[3,3:4])),rep(y.pos[3],2))
lines(log(as.numeric(sterne.trunc6[3,3:4])),rep(y.pos[4],2))
lines(log(as.numeric(sterne.b[3,3:4])),rep(y.pos[5],2))
lines(log(as.numeric(sterne.cd43[3,3:4])),rep(y.pos[6],2))
lines(log(as.numeric(sterne.exidu[3,3:4])),rep(y.pos[7],2))

plot(xlimits,c(0,1),type="n",xlab="Hazard Ratio",ylab="",axes=FALSE)
title("400 vs. 500 (ref)")
axis(1,at=c(log(0.33),log(0.5),0,log(2),log(3)),c(0.3,0.5,1,2,3))
abline(v=0,col=gray(.8))
points(log(as.numeric(c(sterne.cont[4,2],sterne.exdeath[4,2],sterne.add2[4,2],sterne.trunc6[4,2],sterne.b[4,2],
                        sterne.cd43[4,2],sterne.exidu[4,2]))),y.pos,pch=3)
lines(log(as.numeric(sterne.cont[4,3:4])),rep(y.pos[1],2))
lines(log(as.numeric(sterne.exdeath[4,3:4])),rep(y.pos[2],2))
lines(log(as.numeric(sterne.add2[4,3:4])),rep(y.pos[3],2))
lines(log(as.numeric(sterne.trunc6[4,3:4])),rep(y.pos[4],2))
lines(log(as.numeric(sterne.b[4,3:4])),rep(y.pos[5],2))
lines(log(as.numeric(sterne.cd43[4,3:4])),rep(y.pos[6],2))
lines(log(as.numeric(sterne.exidu[4,3:4])),rep(y.pos[7],2))

dev.off()