\documentclass[10pt]{article}
\bibliographystyle{cj}
\usepackage{longtable}        % allows tables to break
\usepackage[pdftex]{lscape}   % allows tables to be landscaped
%\usepackage[pdftex]{graphicx}
\usepackage{pdfpages}
\usepackage[letterpaper,margin=1in]{geometry}
\usepackage{setspace,relsize}   % needed for latex(describe()), \code
\usepackage{moreverb}           % for verbatimtabinput 
% \usepackage{tabularx}
%\usepackage{colortbl}
%\usepackage{placeins}
%\usepackage[dotinlabels]{titletoc}
%\usepackage{amsmath}
%\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{fancyhdr}
\usepackage{graphicx, subfigure}
%\usepackage{vmargin}
\usepackage{lscape}
\usepackage{rotating}
\usepackage{endnotes}
\usepackage{color}
\def\linkcol{blue}
%
\usepackage{tabularx} % required in the preamble
%\setpapersize{USletter}

%
\usepackage{tabularx} % required in the preamble
%\setpapersize{USletter}
\begin{document}
%Define the sub directory for placement of graphics
\SweaveOpts{prefix.string=pspdf/nonparm}
%Define all graphics to be 100\% of their width
\setkeys{Gin}{width=1.0\textwidth} %default is 0.8


%\def\printid{1}


<<echo=FALSE, results=hide>>=
#Clear existing data
rm(list=ls())
#----------------------------------#
#      Loading libraries           #
#----------------------------------#
library(foreign)
library(Hmisc)
library(rms)
library(survival)
library(cmprsk)

#---------------------------------------#
#            Functions                  #
#---------------------------------------#
# Function to do ADE and ADE/Death analyses for a given subgroup of subjects
source("modelfun.R")

#------------------------------------#
#           Summary.fun              #
#------------------------------------#
# Creates different summaries for the adjusted models
summary.fun <- function(mod,cov,risk.levels){
   # Comparing CD4's
   if(cov == "go"){
      summ.50 <- summary(mod, artcc_origin_g.factor = "WEST",
                              gender.factor = "Male",
                              cd4_t0_corrected = c(200,50),
                              lrna_t0_corrected = c(5,4),
                              #hemoglobin_t0_corrected=c(12,13),
                              age = c(40,20),
                              pi_based.factor = "No",
                              nnrti_based.factor = "No",
#                               other_based.factor = "No",
                              haartyr = c(2001,1998),
                              risk.factor = risk.levels[1],
                              stagec.factor = "No")
   } else if (cov == "nb"){
      summ.50 <- summary(mod, native.born.factor = "Yes",
                              gender.factor = "Male",
                              cd4_t0_corrected = c(200,50),
                              lrna_t0_corrected = c(5,4),
                              #hemoglobin_t0_corrected=c(12,13),
                              age = c(40,20),
                              pi_based.factor = "No",
                              nnrti_based.factor = "No",
#                               other_based.factor = "No",
                              haartyr = c(2001,1998),
                              risk.factor = risk.levels[1],
                              stagec.factor = "No")
   } else if (cov == "race"){
      summ.50 <- summary(mod, combined.factor = "White",
                              gender.factor = "Male",
                              cd4_t0_corrected = c(200,50),
                              lrna_t0_corrected = c(5,4),
                              #hemoglobin_t0_corrected=c(13,12),
                              age = c(40,20),
                              pi_based.factor = "No",
                              nnrti_based.factor = "No",
#                               other_based.factor = "No",
                              haartyr = c(2001,1998),
                              risk.factor = risk.levels[1],
                              stagec.factor = "No")
   }
   summ.100 <- summary(mod, est.all=FALSE,
                           cd4_t0_corrected = c(200,100))
   summ.350 <- summary(mod, est.all=FALSE,
                            cd4_t0_corrected = c(200,350))
   summ.500 <- summary(mod, est.all=FALSE,
                            cd4_t0_corrected = c(200,500))

   # Comparing log viral loads
   summ.vl.6 <- summary(mod, est.all=FALSE,
                             lrna_t0_corrected = c(5,6))


   # Comparing ages
   summ.age.30 <- summary(mod, est.all=FALSE,
                               age = c(40,30))
   summ.age.50 <- summary(mod, est.all=FALSE,
                               age = c(40,50))
   summ.age.60 <- summary(mod, est.all=FALSE,
                               age = c(40,60))

   # year of HAART start
   summ.haart.2004 <- summary(mod, est.all=FALSE,
                                   haartyr = c(2001,2004))
   summ.haart.2006 <- summary(mod, est.all=FALSE,
                                   haartyr = c(2001,2006))

   return(list(summ.50=summ.50,
               summ.100=summ.100,
               summ.350=summ.350,
               summ.500=summ.500,
               summ.vl.6=summ.vl.6,
               #summ.hg.14=summ.hg.14,
               summ.age.30=summ.age.30,
               summ.age.50=summ.age.50,
               summ.age.60=summ.age.60,
               summ.haart.2004=summ.haart.2004,
               summ.haart.2006=summ.haart.2006))}

   #----------------------------------#
   #            Format.fun            #
   #----------------------------------#
# Formats the summaries into a data frame.  This data frame still needs to be adjusted to incorporate
format.fun <- function(summ.,mod,cov,risk.levels,main.levels){
   mod.orig <- summ.[[1]]
   order.rows <- vector(mode="numeric",length=(length(rownames(mod.orig))/2))
   if(cov == "go"){
      main.levels2 <- paste("artcc_origin_g.factor - ",main.levels[2:length(main.levels)],":",main.levels[1],sep="")
   } else if (cov == "nb"){
      main.levels2 <- paste("native.born.factor - ",main.levels[2:length(main.levels)],":",main.levels[1],sep="")
   } else if (cov == "race"){
      main.levels2 <- paste("combined.factor - ",main.levels[2:length(main.levels)],":",main.levels[1],sep="")
   }
   order.rows[1:(length(main.levels)-1)] <- which(rownames(mod.orig) %in% main.levels2)
   order.rows[length(main.levels):(length(main.levels) + 6)] <-
              c(which(rownames(mod.orig) == "gender.factor - Female:Male"),
                which(rownames(mod.orig) == "cd4_t0_corrected"),
                which(rownames(mod.orig) == "lrna_t0_corrected"),
                which(rownames(mod.orig) == "age"),
                which(rownames(mod.orig) == "pi_based.factor - Yes:No"),
                which(rownames(mod.orig) == "nnrti_based.factor - Yes:No"),
                which(rownames(mod.orig) == "haartyr"))
   risk.levels2 <- paste("risk.factor - ",risk.levels[2:length(risk.levels)],":",risk.levels[1],sep="")
   order.rows[(length(main.levels)+7):(length(main.levels)+6+(length(risk.levels)-1))] <-
               which(rownames(mod.orig) %in% risk.levels2)
   order.rows[length(order.rows)] <- which(rownames(mod.orig) == "stagec.factor - Yes:No")

   df <- round(mod.orig[,c("Effect","Lower 0.95","Upper 0.95")],2)
   ci <- paste("(",df[,"Lower 0.95"],", ",df[,"Upper 0.95"],")",sep="")
   df <- cbind(df[,1],ci)
   df <- df[(order.rows+1),]
   summ.p <- anova(mod)[,"P"]
   summ.p <- ifelse(summ.p < 0.001, "< 0.001", ifelse(summ.p < 0.01 & summ.p >= 0.001,
                    round(summ.p,3), round(summ.p,2)))
   nonlinear.p <- summ.p[(length(summ.p)-1)]
   summ.p <- summ.p[names(summ.p) %nin% c(" Nonlinear","TOTAL NONLINEAR","TOTAL")]
   summ.p <- c(summ.p[1],rep("",times=(length(main.levels)-2)),summ.p[2:9],rep("",times=(length(risk.levels)-2)),
               summ.p[10])
   df <- cbind(df,summ.p)
   colnames(df) <- c("Hazard Ratio","95\\% CI","P")

   rownames.mod.orig <- rownames(mod.orig)
   rownames.mod.orig <- rownames.mod.orig[order.rows]
   rownames(df) <- rownames.mod.orig

   which.cd4 <- which(rownames(df) == "cd4_t0_corrected")
   which.vl <- which(rownames(df) == "lrna_t0_corrected")
   which.age <- which(rownames(df) == "age")
   which.haart <- which(rownames(df) == "haartyr")

      df <- rbind(df[1:which.cd4,],
                  c(round(summ.[[2]][2,4],2),paste("(",round(summ.[[2]][2,6],2),", ",round(summ.[[2]][2,7],2),")",sep=""),""),
                  c(round(summ.[[3]][2,4],2),paste("(",round(summ.[[3]][2,6],2),", ",round(summ.[[3]][2,7],2),")",sep=""),""),
                  c(round(summ.[[4]][2,4],2),paste("(",round(summ.[[4]][2,6],2),", ",round(summ.[[4]][2,7],2),")",sep=""),""),
                  df[(which.cd4+1):which.vl,],
                  c(round(summ.[[5]][2,4],2),paste("(",round(summ.[[5]][2,6],2),", ",round(summ.[[5]][2,7],2),")",sep=""),""),
                  df[(which.vl+1):which.age,],
                  c(round(summ.[[6]][2,4],2),paste("(",round(summ.[[6]][2,6],2),", ",round(summ.[[6]][2,7],2),")",sep=""),""),
                  c(round(summ.[[7]][2,4],2),paste("(",round(summ.[[7]][2,6],2),", ",round(summ.[[7]][2,7],2),")",sep=""),""),
                  c(round(summ.[[8]][2,4],2),paste("(",round(summ.[[8]][2,6],2),", ",round(summ.[[8]][2,7],2),")",sep=""),""),
                  df[(which.age+1):which.haart,],
                  c(round(summ.[[9]][2,4],2),paste("(",round(summ.[[9]][2,6],2),", ",round(summ.[[9]][2,7],2),")",sep=""),""),
                  c(round(summ.[[10]][2,4],2),paste("(",round(summ.[[10]][2,6],2),", ",round(summ.[[10]][2,7],2),")",sep=""),""),
                  df[(which.haart+1):dim(df)[1],])
   if(cov == "go"){
      covariate <- c("Northern Africa and Middle East","Sub-Saharan Africa",
                                "Asia","Latin America")
   } else if (cov == "nb"){
      covariate <- "Non-native born"
   } else if (cov == "race"){
      covariate <- c("Black","Hispanic","Asian","Arab","Indigenous")
   }
   df <- cbind("Covariate"=c(covariate,c("Female","Baseline CD4 (50 vs 200)","Baseline CD4 (100 vs 200)",
                                "Baseline CD4 (350 vs 200)","Baseline CD4 (500 vs 200)",
                                "Baseline log viral load (4 vs 5)","Baseline log viral load (6 vs 5)",
                                "Age (20 vs 40)","Age (30 vs 40)","Age (50 vs 40)","Age (60 vs 40)",
                                "PI-based initial ART","NNRTI-based initial ART",
                                "Year of HAART start (1998 vs 2001)","Year of HAART start (2004 vs 2001)",
                                "Year of HAART start (2006 vs 2001)"),
                                paste("Risk factor -- ",risk.levels[2:length(risk.levels)],sep=""),"Prior ADE"),
                df)

   return(list(df=df,
               nonlinear.p=nonlinear.p))
}

#----------------------------------#
#          Loading data            #
#----------------------------------#
# Baseline data
baseline <- read.dta("baselinedata_for_cathy.dta",convert.factors=FALSE,convert.underscore=FALSE)
# Long format with hemoglobin data
hemoglobin <- read.dta("haemoglobin_for_cathy.dta",convert.factors=FALSE,convert.underscore=FALSE)
origin <- read.dta("info_origin_race_2011.dta",convert.factors=FALSE,convert.underscore=FALSE)
# Long format with CD4 data
cd4.data <- read.dta("t22_measurements_lab_cd4.dta",convert.factors=FALSE,convert.underscore=FALSE)
# Long format with viral load data
vl.data <- read.dta("t24_measurements_lab_rna.dta",convert.factors=FALSE,convert.underscore=FALSE)
# Long format with ADE data
ade.data <- read.dta("t40_aids_events.dta",convert.factors=FALSE,convert.underscore=FALSE)
# Geographic origin data for Vanderbilt subjects
vu.geo.org <- read.csv('region_of_origin_12jan2012.csv',sep=',',header=TRUE,stringsAsFactors=FALSE)
names(vu.geo.org) <- c('patient','origin_g.factor')
vu.geo.org$unique_id <- paste(vu.geo.org$patient,'Vanderbilt',sep='.')
vu.geo.org$origin_g <- ifelse(vu.geo.org$origin_g.factor == 'Europe',0,
                              ifelse(vu.geo.org$origin_g.factor == 'Northern Africa and Middle East',11,
                                     ifelse(vu.geo.org$origin_g.factor == 'Sub-Saharan Africa',12,
                                            ifelse(vu.geo.org$origin_g.factor == 'Asia',20,
                                                   ifelse(vu.geo.org$origin_g.factor == 'Australia, Oceania, and New Zealand',30,
                                                   ifelse(vu.geo.org$origin_g.factor == 'North America',51,
                                                   ifelse(vu.geo.org$origin_g.factor == 'Central and South America',52,
                                                          ifelse(vu.geo.org$origin_g.factor == 'Unknown',99,NA))))))))
vu.geo.org <- vu.geo.org[order(vu.geo.org$unique_id),]

# Event codes
baseline$event1 <- ifelse(baseline$event1 == "", NA, baseline$event1)
baseline$event2 <- ifelse(baseline$event2 == "", NA, baseline$event2)
baseline$event3 <- ifelse(baseline$event3 == "", NA, baseline$event3)
baseline$event4 <- ifelse(baseline$event4 == "", NA, baseline$event4)
baseline$event5 <- ifelse(baseline$event5 == "", NA, baseline$event5)
baseline$event6 <- ifelse(baseline$event6 == "", NA, baseline$event6)
baseline$event7 <- ifelse(baseline$event7 == "", NA, baseline$event7)
baseline$event8 <- ifelse(baseline$event8 == "", NA, baseline$event8)

# For those who have missing event1 values but non-missing event_d1 values, need to check the aids variable
# If aids = 1, then make event1 = "UNK"; otherwise, leave as an empty string
baseline$event1 <- ifelse(is.na(baseline$event1) & !is.na(baseline$event_d1) & baseline$aids == 1,"UNK",
                          baseline$event1)
baseline$event_d1 <- ifelse(is.na(baseline$event1) & !is.na(baseline$event_d1) & baseline$aids == 0,NA,
                            as.character(baseline$event_d1))
baseline$event_d1 <- as.Date(baseline$event_d1)


event_codes <- read.csv("event_codes.csv",sep=",",header=TRUE,stringsAsFactors=FALSE)
baseline$event1_name <- event_codes[match(baseline$event1,event_codes$event,nomatch=NA),"diseasedescription"]
baseline$event2_name <- event_codes[match(baseline$event2,event_codes$event,nomatch=NA),"diseasedescription"]
baseline$event3_name <- event_codes[match(baseline$event3,event_codes$event,nomatch=NA),"diseasedescription"]
baseline$event4_name <- event_codes[match(baseline$event4,event_codes$event,nomatch=NA),"diseasedescription"]
baseline$event5_name <- event_codes[match(baseline$event5,event_codes$event,nomatch=NA),"diseasedescription"]
baseline$event6_name <- event_codes[match(baseline$event6,event_codes$event,nomatch=NA),"diseasedescription"]
baseline$event7_name <- event_codes[match(baseline$event7,event_codes$event,nomatch=NA),"diseasedescription"]
baseline$event8_name <- event_codes[match(baseline$event8,event_codes$event,nomatch=NA),"diseasedescription"]


@


<<echo=FALSE, results=hide>>=
#----------------------------------#
#        Formatting data           #
#----------------------------------#
# Defining a unique subject identifier
baseline$unique_id <- paste(baseline$patient,baseline$cohort,sep=".")
hemoglobin$unique_id <- paste(hemoglobin$patient,hemoglobin$cohort,sep=".")
hemoglobin <- subset(hemoglobin, unique_id %in% baseline$unique_id)
origin$unique_id <- paste(origin$patient,origin$cohort,sep=".")
cd4.data$unique_id <- paste(cd4.data$patient,cd4.data$cohort,sep=".")
cd4.data <- subset(cd4.data, unique_id %in% baseline$unique_id)
vl.data$unique_id <- paste(vl.data$patient,vl.data$cohort,sep=".")
vl.data <- subset(vl.data, unique_id %in% baseline$unique_id)
ade.data$unique_id <- paste(ade.data$patient,ade.data$cohort,sep=".")
ade.data <- subset(ade.data, unique_id %in% baseline$unique_id)

# Defining the first year of HAART
baseline$end_of_first_year_haart <- baseline$haart_d + 365.25 # Adding 1 year to haart_d date

# Defining whether a subject had an ADE in the first year of HAART
baseline$ade_first_year_of_haart <- ifelse(baseline$event_d1 >= baseline$haart_d &
                                           baseline$event_d1 <= baseline$end_of_first_year_haart &
                                             !is.na(baseline$event_d1) & baseline$event_d1 <= baseline$clinfup_d & 
                                             baseline$event_d1 <= baseline$closeDate,1,0)
baseline$ade_first_year_of_haart.factor <- factor(baseline$ade_first_year_of_haart, levels=c(0,1), labels=c("No","Yes"))
baseline$death_first_year_of_haart <- ifelse(baseline$death_d <= baseline$end_of_first_year_haart &
                                             !is.na(baseline$death_d),1,0)
baseline$death_first_year_of_haart.factor <- factor(baseline$death_first_year_of_haart, levels=c(0,1), labels=c("No","Yes"))
baseline$ade_or_death_first_year_of_haart <- ifelse(baseline$ade_first_year_of_haart == 1 |
                                                 baseline$death_first_year_of_haart == 1, 1, 0)
baseline$ade_or_death_first_year_of_haart.factor <- factor(baseline$ade_or_death_first_year_of_haart, levels=c(0,1),
                                                           labels=c("No","Yes"))


# Creating factor variables with labels for numerical categorical variables
# Combine "Blood" with "Other or unknown"
baseline$risk <- ifelse(baseline$risk %in% c(4,5) & !is.na(baseline$risk),5,
                        ifelse(!is.na(baseline$risk),baseline$risk,NA))
baseline$risk.factor <- factor(baseline$risk, levels=c(3,2,1,5), labels=c("Heterosexual","IDU","MSM","Blood, Other or unknown"))
baseline$region.factor <- factor(baseline$region, levels=c(0,1,2), labels=c("Europe","Canada","US"))


# Sorting baseline by unique_id in order to add in Vanderbilt geographic origin data
baseline <- baseline[order(baseline$unique_id),]
which.row <- which(baseline$cohort == 'Vanderbilt')
baseline$origin_g[which.row] <- vu.geo.org$origin_g


baseline$origin_g.factor <- factor(baseline$origin_g, levels=c(0,11,12,20,30,51,52,99),
                                   labels=c("Europe","Northern Africa and Middle East",
                                            "Sub-Saharan Africa","Asia",
                                            "Australia, Oceania and New Zealand",
                                            "North America","Central and South America",
                                            "Unknown"))
baseline$origin_g_combined <- ifelse(baseline$origin_g == 30, 51,baseline$origin_g)
baseline$origin_g_combined.factor <- factor(baseline$origin_g_combined, levels=c(0,11,12,20,51,52,99),
                                            labels=c("Europe","Northern Africa and Middle East",
                                                     "Sub-Saharan Africa","Asia","North America",
                                                     "Central and South America", "Unknown"))
baseline$gender.factor <- factor(baseline$gender, levels=c(1,2), labels=c("Male","Female"))
baseline$stagec.factor <- factor(baseline$stagec, levels=c(0,1), labels=c("No","Yes"))
baseline$cohort.factor <- factor(baseline$cohort)

baseline$combined.factor <- factor(baseline$combined, levels=c(0,1,2,3,4,5,9),
                                   labels=c("White","Black","Hispanic","Asian","Arab","Indigenous","Unspecified"))
# Defining ART-CC's version of geographic origin from origin_g
baseline$artcc_origin_g <- ifelse(baseline$origin_g %in% c(0,30,51),"WEST",
                                  ifelse(baseline$origin_g == 11, "NAME",
                                  ifelse(baseline$origin_g == 12, "SSA",
                                  ifelse(baseline$origin_g == 20, "ASIA",
                                  ifelse(baseline$origin_g == 52, "LA",
                                  ifelse(baseline$origin_g == 99, "Unknown",NA))))))
baseline$artcc_origin_g.factor <- factor(baseline$artcc_origin_g, levels=c("WEST","NAME","SSA","ASIA","LA","Unknown"))

baseline$native.born <- ifelse(baseline$artcc_origin_g.factor == "WEST" & !is.na(baseline$artcc_origin_g.factor),1,
                                ifelse(!is.na(baseline$artcc_origin_g.factor) & baseline$artcc_origin_g.factor != "Unknown",0,
                                ifelse(!is.na(baseline$artcc_origin_g.factor) & baseline$artcc_origin_g.factor == "Unknown",2,NA)))
baseline$native.born.factor <- factor(baseline$native.born, levels=c(1,0,2), labels=c("Yes","No","Unknown"))

# Defining whether an initial ART regimen was PI-based, NNRTI-based, or Other.  Will create separate indicator
# variables to define this.
baseline$pi_based <- ifelse(baseline$totpi > 0,1,0)
baseline$pi_based.factor <- factor(baseline$pi_based, levels=c(0,1), labels=c("No","Yes"))
baseline$nnrti_based <- ifelse(baseline$totnnrti > 0,1,0)
baseline$nnrti_based.factor <- factor(baseline$nnrti_based, levels=c(0,1), labels=c("No","Yes"))
baseline$other_based <- ifelse(baseline$totpi == 0 & baseline$totnnrti == 0,1,0)
baseline$other_based.factor <- factor(baseline$other_based, levels=c(0,1), labels=c("No","Yes"))
rlt_based <- apply(subset(baseline, select=c(haart0_1,haart0_2,haart0_3,haart0_4,haart0_5,haart0_6,haart0_7,
                                             haart0_8,haart0_9)),1,FUN=function(y){
                                             as.numeric(any(y == "RLT"))})
baseline$rlt_based <- rlt_based
baseline$rlt_based.factor <- factor(baseline$rlt_based, levels=c(0,1),labels=c("No","Yes"))
@


<<echo=FALSE, results=hide>>=
# Defining L2FU
baseline$l2fu <- ifelse(baseline$clinfup_d < baseline$end_of_first_year_haart & (baseline$clinfup_d+365.25) < baseline$closeDate &
                        (((baseline$clinfup_d + 365.25) < baseline$death_d & !is.na(baseline$death_d)) |
                         is.na(baseline$death_d)),1,0)
baseline$l2fu.factor <- factor(baseline$l2fu, levels=c(0,1),labels=c("No","Yes"))
# Defining L2FU date
baseline$l2fu_d <- ifelse(baseline$l2fu == 1,as.character(baseline$clinfup_d),NA)
baseline$l2fu_d <- as.Date(baseline$l2fu_d)

baseline$l2fu <- ifelse(abs(baseline$l2fu_d - baseline$end_of_first_year_haart) < 1 & baseline$l2fu == 1,0,baseline$l2fu)
baseline$l2fu.factor <- factor(baseline$l2fu, levels=c(0,1),labels=c("No","Yes"))
baseline$l2fu_d <- ifelse(abs(baseline$l2fu_d - baseline$end_of_first_year_haart) < 1,NA,as.character(baseline$l2fu_d))
baseline$l2fu_d <- as.Date(baseline$l2fu_d)

baseline$european.clinic <- ifelse(baseline$region.factor == "Europe" & !is.na(baseline$region.factor),1,
                                   ifelse(!is.na(baseline$region.factor),0,NA))
baseline$european.clinic.factor <- factor(baseline$european.clinic, levels=c(0,1), labels=c("North American site","European site"))
@

<<echo=FALSE, results=hide>>=
# Need to find out who has baseline CD4 values > 31 days after or > 93 days before
baseline$time_to_baseline_cd4 <- with(baseline, haart_d - cd4_t0_d)
# Note:  There is an error in how cd4_t0 derived in the baseline data set so must reconstruct in the cd4.data data set
cd4.data$haart_d <- baseline[match(cd4.data$unique_id, baseline$unique_id, nomatch=NA),"haart_d"]
cd4.data$time_from_haart_to_cd4 <- with(cd4.data, cd4_d - haart_d)

do <- FALSE
if(do){
junk <- lapply(split(cd4.data,cd4.data$unique_id),
               FUN=function(y){
               y <- subset(y, !is.na(y[,'cd4_v']))
               y <- y[order(y[,"cd4_d"]),]
               if(!is.na(y[1,"haart_d"])){
                  before.haart <- ifelse(y[,"haart_d"] >= y[,"cd4_d"],1,0)
                  if(any(before.haart==1)){
                     min.time.before <- as.numeric(min(abs(y[which(before.haart==1),"time_from_haart_to_cd4"]),na.rm=TRUE))
                     which.row.time.before <- which(abs(y[which(before.haart==1),"time_from_haart_to_cd4"]) == min.time.before)[1]
                  } else {
                     min.time.before <- which.row.time.before <- NA
                  }
                  if(any(before.haart == 0)){
                     min.time.after <- as.numeric(min(abs(y[which(before.haart==0),"time_from_haart_to_cd4"]),na.rm=TRUE))
                     which.row.time.after <- which(abs(y[which(before.haart==0),"time_from_haart_to_cd4"]) == min.time.after)[1]
                  } else {
                     min.time.after <- which.row.time.after <- NA
                  }
                  if(min.time.before <= 93 & !is.na(min.time.before)){
                     cd4_t0_corrected <- y[before.haart==1,"cd4_v"][which.row.time.before]
                     cd4_t0_d_corrected <- y[before.haart==1,"cd4_d"][which.row.time.before]
                  } else if(min.time.after <= 31 & !is.na(min.time.after)){
                     cd4_t0_corrected <- y[before.haart==0,"cd4_v"][which.row.time.after]
                     cd4_t0_d_corrected <- y[before.haart==0,"cd4_d"][which.row.time.after]
                  } else {
                     cd4_t0_corrected <- cd4_t0_d_corrected <- NA
                  }
               } else {
                  cd4_t0_corrected <- cd4_t0_d_corrected <- NA
               }
               df <- data.frame("unique_id"=y[1,"unique_id"],
                                "cd4_t0_corrected"=cd4_t0_corrected,
                                "cd4_t0_d_corrected"=cd4_t0_d_corrected)
               return(df)})


junk.df <- do.call("rbind",junk)


baseline$cd4_t0_d_corrected <- junk.df[match(baseline$unique_id, junk.df$unique_id, nomatch=NA),"cd4_t0_d_corrected"]
baseline$cd4_t0_corrected <- junk.df[match(baseline$unique_id, junk.df$unique_id, nomatch=NA),"cd4_t0_corrected"]

baseline$cd4_diff <- with(baseline, cd4_t0 - cd4_t0_corrected)

save(baseline,file="updated_baseline.rda",compress=TRUE)
}

#-------------------------------------------------------------------------------#
vl.data$haart_d <- baseline[match(vl.data$unique_id, baseline$unique_id, nomatch=NA),"haart_d"]
vl.data$time_from_haart_to_rna <- with(vl.data, rna_d - haart_d)

do <- FALSE

if(do){
load("updated_baseline.rda")

junk <- lapply(split(vl.data,vl.data$unique_id),
               FUN=function(y){
               y <- subset(y, !is.na(rna_v))
               y <- y[order(y[,"rna_d"],y[,"rna_v"]),]

               if(!is.na(y[1,"haart_d"])){
                  unique_id <- y[1,"unique_id"]
                  before.haart <- ifelse(y[,"haart_d"] >= y[,"rna_d"] & abs(y[,"time_from_haart_to_rna"]) <= 93,1,
                                         ifelse(y[,"haart_d"] < y[,"rna_d"] & y[,"time_from_haart_to_rna"] <= 31,0,NA))
                  y <- y[!is.na(before.haart),]
                  before.haart <- before.haart[!is.na(before.haart)]
                  if(any(before.haart == 1)){
                     min.time.before <- as.numeric(min(abs(y[which(before.haart==1),"time_from_haart_to_rna"]),na.rm=TRUE))
                     which.row.time.before <- which(abs(as.numeric(y[which(before.haart==1),"time_from_haart_to_rna"])) ==
                     min.time.before)
                     if(length(which.row.time.before) > 1){
                        rna_t0_corrected <- max(y[which.row.time.before,"rna_v"],na.rm=TRUE)
                        if(rna_t0_corrected == -1){
                           rna_t0_corrected <- max(y[which.row.time.before,"rna_l"],na.rm=TRUE)
                           which.max.value <- which(y[which.row.time.before,"rna_v"]== -1)[1]
                        } else {
                           which.max.value <- which(y[which.row.time.before,"rna_v"]==rna_t0_corrected)
                        }
                        rna_t0_d_corrected <- y[which.row.time.before,"rna_d"][which.max.value]
                     } else {
                        rna_t0_corrected <- y[which.row.time.before,"rna_v"]
                        rna_t0_d_corrected <- y[which.row.time.before,"rna_d"]
                        if(rna_t0_corrected == -1) {
                           rna_t0_corrected <- y[which.row.time.before,"rna_l"]
                        }
                     }
                  } else if(any(before.haart == 0)){
                     min.time.after <- as.numeric(min(abs(y[which(before.haart==0),"time_from_haart_to_rna"]),na.rm=TRUE))
                     which.row.time.after <- which(abs(y[which(before.haart==0),"time_from_haart_to_rna"]) == min.time.after)
                     if(length(which.row.time.after) > 1){
                        rna_t0_corrected <- max(y[which.row.time.after,"rna_v"],na.rm=TRUE)
                        if(rna_t0_corrected == -1){
                           rna_t0_corrected <- max(y[which.row.time.after,"rna_l"],na.rm=TRUE)
                           which.max.value <- which(y[which.row.time.after,"rna_v"]== -1)[1]
                        } else {
                           which.max.value <- which(y[which.row.time.after,"rna_v"]==rna_t0_corrected)
                        }
                        rna_t0_d_corrected <- y[which.row.time.after,"rna_d"][which.max.value]
                     } else {
                        rna_t0_corrected <- y[which.row.time.after,"rna_v"]
                        rna_t0_d_corrected <- y[which.row.time.after,"rna_d"]
                        if(rna_t0_corrected == -1) {
                           rna_t0_corrected <- y[which.row.time.after,"rna_l"]
                        }
                     }
                  } else {
                     rna_t0_corrected <- rna_t0_d_corrected <- NA
                  }
               }
               if(rna_t0_corrected %in% c(Inf,-Inf)){
                  rna_t0_corrected <- NA
               }

               rna_t0_corrected <- ifelse(rna_t0_corrected < 500 & !is.na(rna_t0_corrected), 500, rna_t0_corrected)
               df <- data.frame("unique_id"=unique_id,
                                "rna_t0_corrected"=rna_t0_corrected,
                                "rna_t0_d_corrected"=rna_t0_d_corrected)
               return(df)})


junk.df <- do.call("rbind",junk)

baseline$rna_t0_d_corrected <- junk.df[match(baseline$unique_id, junk.df$unique_id, nomatch=NA),"rna_t0_d_corrected"]
baseline$rna_t0_corrected <- junk.df[match(baseline$unique_id, junk.df$unique_id, nomatch=NA),"rna_t0_corrected"]
baseline$rna_t0_corrected <- ifelse(is.na(baseline$rna_t0_corrected) & baseline$rna_t0 == 1,500,baseline$rna_t0_corrected)
baseline$rna_diff <- with(baseline, rna_t0 - rna_t0_corrected)

save(baseline,file="updated_baseline.rda",compress=TRUE)
}

hemoglobin$haart_d <- baseline[match(hemoglobin$unique_id, baseline$unique_id, nomatch=NA),"haart_d"]
hemoglobin$time_from_haart_to_hemoglobin <- with(hemoglobin, lab_d - haart_d)

do <- FALSE
if(do){
junk <- lapply(split(hemoglobin,hemoglobin$unique_id),
               FUN=function(y){
               y <- y[order(y[,"lab_d"]),]
               if(!is.na(y[1,"haart_d"])){
                  before.haart <- ifelse(y[,"haart_d"] >= y[,"lab_d"],1,0)
                  if(any(before.haart==1)){
                     min.time.before <- as.numeric(min(abs(y[which(before.haart==1),"time_from_haart_to_hemoglobin"]),na.rm=TRUE))
                     which.row.time.before <- which(abs(y[which(before.haart==1),"time_from_haart_to_hemoglobin"]) == min.time.before)[1]
                  } else {
                     min.time.before <- which.row.time.before <- NA
                  }
                  if(any(before.haart == 0)){
                     min.time.after <- as.numeric(min(abs(y[which(before.haart==0),"time_from_haart_to_hemoglobin"]),na.rm=TRUE))
                     which.row.time.after <- which(abs(y[which(before.haart==0),"time_from_haart_to_hemoglobin"]) == min.time.after)[1]
                  } else {
                     min.time.after <- which.row.time.after <- NA
                  }
                  #if(min.time.before <= 93 & !is.na(min.time.before)){
                  if(min.time.before <= 181 & !is.na(min.time.before)){
                     hemoglobin_t0_corrected <- y[before.haart==1,"lab_v"][which.row.time.before]
                     hemoglobin_t0_d_corrected <- y[before.haart==1,"lab_d"][which.row.time.before]
                  } else if(min.time.after <= 31 & !is.na(min.time.after)){
                     hemoglobin_t0_corrected <- y[before.haart==0,"lab_v"][which.row.time.after]
                     hemoglobin_t0_d_corrected <- y[before.haart==0,"lab_d"][which.row.time.after]
                  } else {
                     hemoglobin_t0_corrected <- hemoglobin_t0_d_corrected <- NA
                  }
               } else {
                  hemoglobin_t0_corrected <- hemoglobin_t0_d_corrected <- NA
               }
               df <- data.frame("unique_id"=y[1,"unique_id"],
                                "hemoglobin_t0_corrected"=hemoglobin_t0_corrected,
                                "hemoglobin_t0_d_corrected"=hemoglobin_t0_d_corrected)
               return(df)})


junk.df <- do.call("rbind",junk)


baseline$hemoglobin_t0_d_corrected <- junk.df[match(baseline$unique_id, junk.df$unique_id, nomatch=NA),"hemoglobin_t0_d_corrected"]
baseline$hemoglobin_t0_corrected <- junk.df[match(baseline$unique_id, junk.df$unique_id, nomatch=NA),"hemoglobin_t0_corrected"]

save(baseline,file="updated_baseline.rda",compress=TRUE)
}


load("updated_baseline.rda")

# Calculating corrected log10 viral load from corrected baseline viral load
baseline$lrna_t0_corrected <- log10(baseline$rna_t0_corrected)


# Still need to subset out those who have no known country of origin or race
baseline$diff. <- with(baseline, clinfup_d - haart_d) 
sub.baseline <- subset(baseline, (!is.na(origin_g) | !is.na(combined)) & haart_d < clinfup_d)
sub.baseline <- upData(sub.baseline, drop="recruit_d")

# Defining whether someone had an ADE in the first year of HAART that was not TUB
# Could have multiple ADEs as first ADE so have to check
ade1 <- apply(subset(sub.baseline, select=c(unique_id,event1,event_d1,
                                          ade_first_year_of_haart,end_of_first_year_haart)),1,FUN=function(y){
                                          if(y["ade_first_year_of_haart"]==1 &
                                             y["event1"] != "TUB" & !is.na(y["event1"])){
                                                ade1 <- 1
                                          } else {
                                             ade1 <- 0
                                          }
                                          })
sub.baseline <- cbind(sub.baseline, ade1)

ade2 <- apply(subset(sub.baseline, select=c(unique_id,event2,event_d2,ade1,clinfup_d,
                                          ade_first_year_of_haart,end_of_first_year_haart)),1,FUN=function(y){
                                          if(y["event_d2"] <= y["end_of_first_year_haart"] & 
                                             y["event_d2"] <= y["clinfup_d"] & y["ade1"] != 1 &
                                             y["event2"] != "TUB" & !is.na(y["event2"])){
                                                ade2 <- 1
                                          } else {
                                             ade2 <- 0
                                          }
                                          })
sub.baseline <- cbind(sub.baseline, ade2)

ade3 <- apply(subset(sub.baseline, select=c(unique_id,event3,event_d3,ade1,ade2,clinfup_d,
                                          ade_first_year_of_haart,end_of_first_year_haart)),1,FUN=function(y){
                                          if(y["event_d3"] <= y["end_of_first_year_haart"] &
                                             y["event_d3"] <= y["clinfup_d"] & 
                                          y["ade1"] != 1 & y["ade2"] != 1 &
                                             y["event3"] != "TUB" & !is.na(y["event3"])){
                                                ade3 <- 1
                                          } else {
                                             ade3 <- 0
                                          }
                                          })
sub.baseline <- cbind(sub.baseline, ade3)

ade4 <- apply(subset(sub.baseline, select=c(unique_id,event4,event_d4,ade1,ade2,ade3,clinfup_d,
                                          ade_first_year_of_haart,end_of_first_year_haart)),1,FUN=function(y){
                                          if(y["event_d4"] <= y["end_of_first_year_haart"] &
                                             y["event_d4"] <= y["clinfup_d"] & 
                                          y["ade1"] != 1 & y["ade2"] != 1 & y["ade3"] != 1 & 
                                             y["event4"] != "TUB" & !is.na(y["event4"])){
                                                ade4 <- 1
                                          } else {
                                             ade4 <- 0
                                          }
                                          })
sub.baseline <- cbind(sub.baseline, ade4)


# ADEs in first year of HAART, regardless if first or not
junk <- subset(sub.baseline, select=c(unique_id, event_d1, event_d2, event_d3, event_d4, event_d5, event_d6, event_d7, event_d8,
                                      end_of_first_year_haart))

junk.long <- reshape(junk, varying=list(c("event_d1","event_d2","event_d3","event_d4","event_d5","event_d6","event_d7","event_d8")),
                     timevar="ADE_number",times=c(1:8),v.names="Date",direction="long")
junk.long <- junk.long[,-5]
junk.long <- junk.long[order(junk.long$unique_id, junk.long$Date),]

junk2 <- subset(sub.baseline, select=c(unique_id,event1,event2,event3,event4,event5,event6,event7,event8))
junk.long2 <- reshape(junk2, varying=list(c("event1","event2","event3","event4","event5","event6","event7","event8")),
                      timevar="ADE_number",times=c(1:8),v.names="ADE",direction="long")
junk.long2 <- junk.long2[order(junk.long2$unique_id, junk.long2$ADE_number),]
junk.long2 <- junk.long2[,-4]

junk3 <- unique(subset(sub.baseline, select=c(unique_id,clinfup_d)))

stuff <- merge(junk.long,junk.long2, all=TRUE)
stuff$clinfup_d <- junk3[match(stuff$unique_id,junk3$unique_id,nomatch=NA),"clinfup_d"]

stuff2 <- subset(stuff, Date <= end_of_first_year_haart & Date <= clinfup_d & !is.na(Date))
stuff2 <- stuff2[order(stuff2$unique_id, stuff2$ADE_number),]
which.duplicated <- (which(duplicated(subset(stuff2,select=c(unique_id,Date,ADE)))))
stuff3 <- stuff2[-which.duplicated,]
tmp <- unique(subset(sub.baseline, unique_id %in% stuff3$unique_id, select=c(unique_id,artcc_origin_g.factor,native.born.factor, 
                                                                             combined.factor,closeDate,clinfup_d,dthfup_d,cenf_d,cennf_d,
                                                                             death_d,l2fu_d,end_of_first_year_haart,haart_d)))
stuff3$artcc_origin_g.factor <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"artcc_origin_g.factor"]
stuff3$native.born.factor <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"native.born.factor"]
stuff3$combined.factor <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"combined.factor"]
stuff3$closeDate <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"closeDate"]
stuff3$clinfup_d <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"clinfup_d"]
stuff3$dthfup_d <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"dthfup_d"]
stuff3$cenf_d <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"cenf_d"]
stuff3$cennf_d <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"cennf_d"]
stuff3$death_d <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"death_d"]
stuff3$l2fu_d <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"l2fu_d"]
stuff3$end_of_first_year_haart <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"end_of_first_year_haart"]
stuff3$haart_d <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"haart_d"]
# Defining incidence_fup
sub.baseline$incidence_end_of_fup <- apply(subset(sub.baseline, select=c(death_d,l2fu_d,end_of_first_year_haart,closeDate)),1,FUN=min,na.rm=TRUE)
sub.baseline$incidence_end_of_fup <- as.Date(sub.baseline$incidence_end_of_fup)
sub.baseline$incidence_fup <- as.numeric(with(sub.baseline, (incidence_end_of_fup - haart_d)/365.25))
tr.go <- tapply(sub.baseline$incidence_fup, INDEX=sub.baseline$artcc_origin_g.factor, FUN=sum, na.rm=TRUE)
tr.go.df <- data.frame("artcc_origin_g.factor"=c(levels(stuff3$artcc_origin_g.factor)),
                       "go.incidence_fup"=as.numeric(tr.go))
sub.baseline$go.incidence_fup <- tr.go.df[match(sub.baseline$artcc_origin_g.factor,tr.go.df$artcc_origin_g.factor,nomatch=NA),
                                           "go.incidence_fup"]
tr.nb <- tapply(sub.baseline$incidence_fup, INDEX=sub.baseline$native.born.factor, FUN=sum, na.rm=TRUE)
tr.nb.df <- data.frame("native.born.factor"=c(levels(sub.baseline$native.born.factor)),
                       "nb.incidence_fup"=as.numeric(tr.nb))
tr.nb.df <- rbind(tr.nb.df,data.frame('native.born.factor'='Combined', 
                                      'nb.incidence_fup'=sum(sub.baseline$incidence_fup[which(!is.na(sub.baseline$native.born.factor))])))

sub.baseline$nb.incidence_fup <- tr.nb.df[match(sub.baseline$native.born.factor,tr.nb.df$native.born.factor,nomatch=NA),
                                    "nb.incidence_fup"]
tr.race <- tapply(sub.baseline$incidence_fup, INDEX=sub.baseline$combined.factor, FUN=sum,na.rm=TRUE)
tr.race.df <- data.frame("combined.factor"=c(levels(sub.baseline$combined.factor)),
                       "race.incidence_fup"=as.numeric(tr.race))
sub.baseline$race.incidence_fup <- tr.race.df[match(sub.baseline$combined.factor,tr.race.df$combined.factor,nomatch=NA),
                                    "race.incidence_fup"]
incidence_fup <- unique(subset(sub.baseline, unique_id %in% stuff3$unique_id, select=c(unique_id,go.incidence_fup,nb.incidence_fup,
                                                                                       race.incidence_fup,incidence_end_of_fup,
                                                                                       incidence_fup)))
stuff3$incidence_fup <- incidence_fup[match(stuff3$unique_id,incidence_fup$unique_id,nomatch=NA),"incidence_fup"]
stuff3$incidence_end_of_fup <- incidence_fup[match(stuff3$unique_id,incidence_fup$unique_id,nomatch=NA),"incidence_end_of_fup"]
stuff3$go.incidence_fup <- incidence_fup[match(stuff3$unique_id,incidence_fup$unique_id,nomatch=NA),"go.incidence_fup"]
stuff3$nb.incidence_fup <- incidence_fup[match(stuff3$unique_id,incidence_fup$unique_id,nomatch=NA),"nb.incidence_fup"]
stuff3$race.incidence_fup <- incidence_fup[match(stuff3$unique_id,incidence_fup$unique_id,nomatch=NA),"race.incidence_fup"]
stuff3$origin_ade <- paste(stuff3$artcc_origin_g.factor,stuff3$ADE,sep=".")
stuff3$origin_ade.factor <- factor(stuff3$origin_ade, levels=sort(levels(interaction(stuff3$artcc_origin_g.factor,stuff3$ADE))))
stuff3$native.born.factor <- factor(stuff3$native.born, levels=c('Yes','No'))
stuff3$native_ade <- paste(stuff3$native.born.factor, stuff3$ADE,sep=".")
stuff3$native_ade.factor <- factor(stuff3$native_ade, levels=sort(levels(interaction(stuff3$native.born.factor,stuff3$ADE))))
stuff3$combined_ade <- paste(stuff3$combined.factor,stuff3$ADE,sep=".")
stuff3$combined_ade.factor <- factor(stuff3$combined_ade, levels=sort(levels(interaction(stuff3$combined.factor,stuff3$ADE))))

# None past event3

# Now need to define end of follow-up based on when the person had the non-TUB ADE in the first year of HAART
sub.baseline$non_tb_event_d <- ifelse(sub.baseline$ade1 == 1,
                                      as.character(sub.baseline$event_d1),
                                      ifelse(sub.baseline$ade2 == 1,
                                      as.character(sub.baseline$event_d2),
                                      ifelse(sub.baseline$ade3 == 1,
                                      as.character(sub.baseline$event_d3),NA)))
sub.baseline$non_tb_event_d <- as.Date(sub.baseline$non_tb_event_d, format="%Y-%m-%d")
sub.baseline$non_tb_first_year_of_haart <- ifelse(sub.baseline$ade1 == 1 |
                                                  sub.baseline$ade2 == 1 |
                                                  sub.baseline$ade3 == 1 |
                                                  sub.baseline$ade4 == 1,1,0)
sub.baseline$non_tb_first_year_of_haart.factor <- factor(sub.baseline$non_tb_first_year_of_haart,levels=c(0,1),
                                                         labels=c("No","Yes"))
sub.baseline$non_tb_or_death_first_year_of_haart <- ifelse(sub.baseline$non_tb_first_year_of_haart == 1 |
                                                           sub.baseline$death_first_year_of_haart == 1,1,0)
sub.baseline$non_tb_or_death_first_year_of_haart.factor <- factor(sub.baseline$non_tb_or_death_first_year_of_haart,
                                                                  levels=c(0,1), labels=c("No","Yes"))

@

<<echo=FALSE, results=tex>>=
# Table of ADEs by GO, NB, E/R
sub.baseline$ade <- ifelse(!is.na(sub.baseline$event_d1) & sub.baseline$event_d1 <= sub.baseline$end_of_first_year_haart &
                            sub.baseline$event_d1 >= sub.baseline$haart_d & sub.baseline$event_d1 <= sub.baseline$clinfup_d,1,0)

sub.baseline$ade.factor <- factor(sub.baseline$ade, levels=c(0,1), labels=c("No","Yes"))

summ.ade.go <- summary(artcc_origin_g.factor ~ ade.factor, data=sub.baseline, method="reverse", overall=TRUE)
summ.ade.go$labels <- "ADE"
latex(summ.ade.go, file="", caption="Distribution of ADEs across Geographic Origin.",exclude1=FALSE, long=TRUE,
      where="!h")

# Listing specific ADEs
# event in first year of haart (and not just the first ADE in the first year of HAART but all in the first year of HAART)
stuff3$ADE_name <- event_codes[match(stuff3$ADE, event_codes$event, nomatch=NA),"diseasedescription"]
stuff3$ADE_name_short <- event_codes[match(stuff3$ADE, event_codes$event, nomatch=NA),'diseasedescription.short']


cj <- lapply(split(subset(stuff3, select=c(unique_id, go.incidence_fup,artcc_origin_g.factor,ADE,origin_ade.factor)),
             stuff3$origin_ade.factor),FUN=function(y){
               if(dim(y)[1] > 0){
                  count <- dim(y)[1]
                  tot_person_yr <- y[1,"go.incidence_fup"]
                  incidence_rate <- count/tot_person_yr*1000
               } else {
                  count <- tot_person_yr <- incidence_rate <- 0
               }
               df <- data.frame("origin_ade"=y[1,"origin_ade.factor"],
                                "count"=count,
                                "incidence_rate"=incidence_rate,
                                "tot_person_yr"=tot_person_yr)

               return(df)})
cj.df <- do.call("rbind",cj)
which.rows <- which(is.na(cj.df$origin_ade))
cj.df$origin_ade[which.rows] <- rownames(cj.df)[which.rows]
cj.df$artcc_origin_g <- sapply(strsplit(as.character(cj.df$origin_ade), "\\."), function(x){x[[1]]})
cj.df$artcc_origin_g.factor <- factor(cj.df$artcc_origin_g, levels=c("WEST","NAME","SSA","ASIA","LA","Unknown"))
cj.df$ADE <- sapply(strsplit(as.character(cj.df$origin_ade), "\\."), function(x){x[[2]]})
cj.df$incidence_rate <- format(round(cj.df$incidence_rate,2),nsmall=2)

cj.df <- subset(cj.df, artcc_origin_g.factor != 'Unknown')
cj.df$ADE <- factor(cj.df$ADE)

# Getting cumulative incidence for geographic origin
col1 <- paste('Combined',sort(unique(stuff3$ADE)),sep='.')
col2 <- as.numeric(tapply(cj.df$count[which(cj.df$artcc_origin_g.factor != 'Unknown')],
                          INDEX=cj.df$ADE[which(cj.df$artcc_origin_g.factor != 'Unknown')], FUN=sum))
col2b <- as.numeric(tapply(cj.df$tot_person_yr[which(cj.df$artcc_origin_g.factor != 'Unknown')],
                           INDEX=cj.df$ADE[which(cj.df$artcc_origin_g.factor != 'Unknown')], FUN=sum))
col3 <- format(round(col2/col2b*1000,2),nsmall=2)
col4 <- col2b
col5 <- rep('Combined',times=length(col1))
col6 <- col5
col7 <- sort(unique(stuff3$ADE))

cj.df <- rbind(cj.df, data.frame('origin_ade'=col1,
                                 'count'=col2,
                                 'incidence_rate'=col3,
                                 'tot_person_yr'=col4,
                                 'artcc_origin_g'=col5,
                                 'artcc_origin_g.factor'=col6,
                                 'ADE'=col7))
# Need to order in decreasing order
tbl.order <- table(stuff3$ADE[which(stuff3$artcc_origin_g.factor != 'Unknown')])
tbl.df <- data.frame('ADE'=names(tbl.order),
                     'Total count'=as.numeric(tbl.order))
cj.df <- merge(cj.df, tbl.df, all=TRUE)
cj.df <- cj.df[order(cj.df$Total.count,cj.df$ADE,cj.df$artcc_origin_g.factor,decreasing=TRUE),]
cj.df <- subset(cj.df, !is.na(Total.count))
cj.df$ADE.short <- event_codes[match(cj.df$ADE,event_codes$event,nomatch=NA),'diseasedescription.short']


which.west <- which(cj.df$artcc_origin_g.factor == "WEST")
which.name <- which(cj.df$artcc_origin_g.factor == "NAME")
which.ssa <- which(cj.df$artcc_origin_g.factor == "SSA")
which.asia <- which(cj.df$artcc_origin_g.factor == "ASIA")
which.la <- which(cj.df$artcc_origin_g.factor == "LA")
which.unknown <- which(cj.df$artcc_origin_g.factor == "Unknown")
which.combined <- which(cj.df$artcc_origin_g.factor == 'Combined')

inc.go.specific <- data.frame("ADE"=cj.df$ADE.short[seq(1,dim(cj.df)[1],by=6)],
                              "West"=paste(cj.df[which.west,"incidence_rate"]," (",
                                           cj.df[which.west,"count"],
                                           ")",sep=""),
                              "NAME"=paste(cj.df[which.name,"incidence_rate"]," (",
                                           cj.df[which.name,"count"],
                                           ")",sep=""),
                              "SSA"=paste(cj.df[which.ssa,"incidence_rate"]," (",
                                           cj.df[which.ssa,"count"],
                                           ")",sep=""),
                              "Asia"=paste(cj.df[which.asia,"incidence_rate"]," (",
                                           cj.df[which.asia,"count"],
                                           ")",sep=""),
                              "LA"=paste(cj.df[which.la,"incidence_rate"]," (",
                                           cj.df[which.la,"count"],
                                           ")",sep=""),
                              'Combined'=paste(cj.df[which.combined,'incidence_rate'],' (',
                                               cj.df[which.combined,'count'],
                                               ')',sep=''))


latex(inc.go.specific, file="", rowname=NULL,insert.bottom = "N (WEST)=36367; N (NAME) = 1111; N (SSA) = 7846; N (ASIA) = 793; N (LA) = 2737; N (Unknown) = 17038.  Frequencies represent total number of events per geographic origin.  Total follow-up time:  WEST=33160.78 pr-yr; NAME=1043.56 pr-yr; SSA=7113.26 pr-yr; Asia=740.83 pr-yr; LA=2480.34 pr-yr; Unknown=16074.84 pr-yr.",caption="Incidence rate of ADEs in the first year of HAART across geographic origin listed as Incidence Rate (Frequency).",where="!h",col.just=c("l","r","r","r","r","r","r"),landscape=TRUE,size="small")


summ.ade.nb <- summary(native.born.factor ~ ade.factor, data=sub.baseline, method="reverse",overall=TRUE)
summ.ade.nb$labels <- "ADE"
latex(summ.ade.nb, file="", caption="Distribution of ADEs across Native Born status.",exclude1=FALSE, long=TRUE,
      where="!h")


# Listing specific ADEs
# event in first year of haart (and not just the first ADE in the first year of HAART but all in the first year of HAART)

cj <- lapply(split(subset(stuff3,select=c(unique_id, nb.incidence_fup,native.born.factor,ADE,native_ade.factor)),
             stuff3$native_ade.factor),FUN=function(y){
               if(dim(y)[1] > 0){
                  count <- dim(y)[1]
#                   tot_person_yr <- sum(y[,"incidence_fup"],na.rm=TRUE)
                  tot_person_yr <- y[1,"nb.incidence_fup"]
                  incidence_rate <- count/tot_person_yr*1000
               } else {
                  count <- tot_person_yr <- incidence_rate <- 0
               }
               df <- data.frame("native_ade"=y[1,"native_ade.factor"],
                                "count"=count,
                                "incidence_rate"=incidence_rate,
                                "tot_person_yr"=tot_person_yr)

               return(df)})
cj.df <- do.call("rbind",cj)
which.rows <- which(is.na(cj.df$native_ade))
cj.df$native_ade[which.rows] <- rownames(cj.df)[which.rows]
cj.df$native.born <- sapply(strsplit(as.character(cj.df$native_ade), "\\."), function(x){x[[1]]})

cj.df$ADE <- sapply(strsplit(as.character(cj.df$native_ade), "\\."), function(x){x[[2]]})
cj.df$ADE <- factor(cj.df$ADE)
cj.df$incidence_rate <- format(round(cj.df$incidence_rate,2),nsmall=2)

# Getting cumulative incidence for combined native born status
col1 <- paste('Combined',sort(unique(stuff3$ADE)),sep='.')
col2 <- as.numeric(tapply(cj.df$count, INDEX=cj.df$ADE, FUN=sum))
col2b <- as.numeric(tapply(cj.df$tot_person_yr, INDEX=cj.df$ADE, FUN=sum))
col3 <- format(round(col2/col2b*1000,2),nsmall=2)
col4 <- col2b
col5 <- rep('Combined',times=length(col1))
col6 <- sort(unique(stuff3$ADE))

cj.df <- rbind(cj.df, data.frame('native_ade'=col1,
                                 'count'=col2,
                                 'incidence_rate'=col3,
                                 'tot_person_yr'=col4,
                                 'native.born'=col5,
                                 'ADE'=col6))
# Need to order in decreasing order
tbl.order <- table(stuff3$ADE[which(!is.na(stuff3$native.born.factor))])
tbl.df <- data.frame('ADE'=names(tbl.order),
                     'Total count'=as.numeric(tbl.order))
cj.df <- merge(cj.df, tbl.df, all=TRUE)
cj.df <- cj.df[order(cj.df$Total.count,cj.df$ADE,cj.df$native.born,decreasing=TRUE),]
cj.df <- subset(cj.df, !is.na(Total.count))
cj.df$ADE.short <- event_codes[match(cj.df$ADE,event_codes$event,nomatch=NA),'diseasedescription.short']


which.yes <- which(cj.df$native.born == "Yes")
which.no <- which(cj.df$native.born == "No")
which.combined <- which(cj.df$native.born == 'Combined')


inc.nb.specific <- data.frame("ADE"=cj.df$ADE.short[seq(1,dim(cj.df)[1],by=3)],
                              "Yes"=paste(cj.df[which.yes,"incidence_rate"]," (",
                                           cj.df[which.yes,"count"],
                                           ")",sep=""),
                              "No"=paste(cj.df[which.no,"incidence_rate"]," (",
                                           cj.df[which.no,"count"],
                                           ")",sep=""),
                              'Combined'=paste(cj.df[which.combined,"incidence_rate"]," (",
                                           cj.df[which.combined,"count"],
                                           ")",sep=""))


latex(inc.nb.specific, file="", rowname=NULL,insert.bottom = "N (Yes)=36367; N (No) = 12487.  Frequencies represent total number of events per native born status. Total follow-up:  NB=33160.78 pr-yr; non-NB=11378 pr-yr.",caption="Incidence rate of ADEs in the first year of HAART across Native Born status listed as Incidence Rate (Frequency).",where="!h",col.just=c("l","r","r","r"),landscape=TRUE,size="small")


summ.ade.er <- summary(combined.factor ~ ade.factor, data=sub.baseline, method="reverse",overall=TRUE)
summ.ade.er$labels <- "ADE"
latex(summ.ade.er, file="", caption="Distribution of ADEs across Ethnicity/Race.",exclude1=FALSE,long=TRUE,
      where="!h",size="small")

# Listing specific ADEs
# event in first year of haart (and not just the first ADE in the first year of HAART but all in the first year of HAART)
cj <- lapply(split(subset(stuff3, select=c(unique_id, race.incidence_fup,combined.factor,ADE,combined_ade.factor)),
             stuff3$combined_ade.factor),FUN=function(y){
               if(dim(y)[1] > 0){
                  count <- dim(y)[1]
                  tot_person_yr <- y[1,"race.incidence_fup"]
                  incidence_rate <- count/tot_person_yr*1000
               } else {
                  count <- tot_person_yr <- incidence_rate <- 0
               }
               df <- data.frame("combined_ade"=y[1,"combined_ade.factor"],
                                "count"=count,
                                "incidence_rate"=incidence_rate,
                                "tot_person_yr"=tot_person_yr)

               return(df)})
cj.df <- do.call("rbind",cj)
which.rows <- which(is.na(cj.df$combined_ade))
cj.df$combined_ade[which.rows] <- rownames(cj.df)[which.rows]
cj.df$combined.factor <- sapply(strsplit(as.character(cj.df$combined_ade), "\\."), function(x){x[[1]]})

cj.df$ADE <- sapply(strsplit(as.character(cj.df$combined_ade), "\\."), function(x){x[[2]]})
cj.df$ADE <- factor(cj.df$ADE)
cj.df$incidence_rate <- format(round(cj.df$incidence_rate,2),nsmall=2)

which.white <- which(cj.df$combined.factor == "White")
which.black <- which(cj.df$combined.factor == "Black")
which.hispanic <- which(cj.df$combined.factor == "Hispanic")
which.asian <- which(cj.df$combined.factor == "Asian")
which.arab <- which(cj.df$combined.factor == "Arab")
which.indigenous <- which(cj.df$combined.factor == "Indigenous")
which.unspecified <- which(cj.df$combined.factor == "Unspecified")

inc.race.specific <- data.frame("ADE"=c("Bacterial pneumonia",
                                        "Candid. unspecified",
                                        "Coccidioidomycosis dissem or extrapulm",
                                        "CMV - other location",
                                        "Cryptococcosis, extrapulm.",
                                        "AIDS dementia complex",
                                        "Hodgkin",
                                        "Histoplasmosis, extrapulm.",
                                        "HSV ulcers or pneumonitis/esophagitis",
                                        "Cervical cancer",
                                        "Isosporiasis diarrhoea",
                                        "Kaposi's sarcoma",
                                        "Leishmaniasis, visceral",
                                        "Non-Hodgkin - primary brain",
                                        "MAC or Kansasii, extrapulm.",
                                        "Mycobacterial extrapulm, other type, specify",
                                        "Non-Hodgkin - unknonw/other histology",
                                        "Other",
                                        "PCP P jiroveci infection",
                                        "Progressive multifocal leucoencephalopathy",
                                        "CMV chorioretinitis",
                                        "Salmonella bacteriaemia (non-typhoid)",
                                        "Cryptosporidosis (duration > 1 month)",
                                        "Unidentified AIDS event",
                                        "Toxoplasmosis, brain",
                                        "Mycobacterial TB, location unspecified",
                                        "Unknown",
                                        "HIV wasting syndrome"),
                                "White"=paste(cj.df[which.white,"incidence_rate"]," (",
                                             cj.df[which.white,"count"],
                                             ")",sep=""),
                                "Black"=paste(cj.df[which.black,"incidence_rate"]," (",
                                              cj.df[which.black,"count"],
                                              ")",sep=""),
                                "Hispanic"=paste(cj.df[which.hispanic,"incidence_rate"]," (",
                                             cj.df[which.hispanic,"count"],
                                             ")",sep=""),
                                "Asian"=paste(cj.df[which.asian,"incidence_rate"]," (",
                                             cj.df[which.asian,"count"],
                                             ")",sep=""),
                                "Arab"=paste(cj.df[which.arab,"incidence_rate"]," (",
                                             cj.df[which.arab,"count"],
                                             ")",sep=""),
                                "Indigenous"=paste(cj.df[which.indigenous,"incidence_rate"]," (",
                                                   cj.df[which.indigenous,"count"],
                                                   ")",sep=""),
                                "Unspecified"=paste(cj.df[which.unspecified,"incidence_rate"]," (",
                                                    cj.df[which.unspecified,"count"],
                                                    ")",sep=""))


latex(inc.race.specific, file="", rowname=NULL,insert.bottom = "N (White)= 40509; N (Black) = 13200; N (Hispanic) = 3233; N (Asian) = 900; N (Arab) = 1048; N (Indigenous) = 312; N (Unspecified) = 6690. Frequencies represent total number of events per ethnicity/race.  Total follow-up:  White=37141.80 pr-yr; Black=12193.26 pr-yr; Hispanic=2946.63 pr-yr; Asian=845.13 pr-yr; Arab=986.02 pr-yr; Indigenous=273.78 pr-yr; Unspecified=6227.00 pr-yr.",caption="Incidence rate of ADEs in the first year of HAART across Race/ethnicity status listed as Incidence Rate (Frequency).",where="!h",col.just=c("l","r","r","r","r","r","r","r"),landscape=TRUE,size="small")


@

                        %--------------------------------------------------%
                        %     Comparisons across Region, GO, and E/R       %
                        %--------------------------------------------------%
<<echo=FALSE, results=tex>>=
summ.region <- summary(region.factor ~ gender.factor + cd4_t0_corrected + age + pi_based.factor +
                                       nnrti_based.factor + other_based.factor + rlt_based.factor + risk.factor + lrna_t0_corrected +
                                       stagec.factor + hemoglobin_t0_corrected + haartyr + l2fu.factor, data=sub.baseline,
                                       method="reverse",overall=TRUE,test=TRUE,digits=2)
summ.region$labels <- c("Sex","Baseline CD4","Baseline age","PI-based initial ART",
                        "NNRTI-based initial ART","Other-based initial ART","RLT-based initial ART","Probable route of infection",
                        "Baseline log viral load","ADE at ART initiation","Baseline hemoglobin",
                        "Yr of HAART initiation","Lost-to-followup")
latex(summ.region, file="", prtest="P",exclude1=FALSE,long=TRUE,digits=2,caption="Descriptive statistics for variables of interest across region of enrollment.",landscape=TRUE)
@


<<echo=FALSE, results=tex>>=
summ.go <- summary(artcc_origin_g.factor ~ gender.factor + region.factor + cd4_t0_corrected + age + pi_based.factor +
                                           nnrti_based.factor + other_based.factor + rlt_based.factor + lrna_t0_corrected +
                                           stagec.factor +  hemoglobin_t0_corrected + haartyr + l2fu.factor + risk.factor,
                   data=sub.baseline, method="reverse",overall=TRUE,test=TRUE,digits=2)
summ.go$labels <- c("Sex","Region of enrollment","Baseline CD4","Age at ART initiation","PI-based initial ART",
                    "NNRTI-based initial ART","Other-based initial ART","RLT-based initial ART","Baseline log viral load",
                    "ADE at ART initiation", "Baseline hemoglobin","Yr of HAART initiation","Lost-to-followup",
                    'Probable route of infection')

latex(summ.go, file="", prtest="P",exclude1=FALSE,long=TRUE,digits=2,caption="Descriptive statistics for variables of interest across geographic origin.",size="small",landscape=TRUE)

# Redoing above omitting those with 'Unknown' geographic origin
sub.baseline$artcc_origin_g.factor.full <- sub.baseline$artcc_origin_g.factor
sub.baseline$artcc_origin_g.factor <- factor(sub.baseline$artcc_origin_g.factor, levels=c("WEST","NAME","SSA","ASIA","LA"))
summ.go.reduced <- summary(artcc_origin_g.factor ~ gender.factor + region.factor + cd4_t0_corrected + age + pi_based.factor +
                                           nnrti_based.factor + other_based.factor + rlt_based.factor + lrna_t0_corrected +
                                           stagec.factor +  hemoglobin_t0_corrected + haartyr + l2fu.factor +
                                           risk.factor,
                   data=sub.baseline, method="reverse",overall=TRUE,test=TRUE,digits=2)
summ.go.reduced$labels <- c("Sex","Region of enrollment","Baseline CD4","Age at ART initiation","PI-based initial ART",
                            "NNRTI-based initial ART","Other-based initial ART","RLT-based initial ART","Baseline log viral load",
                            "ADE at ART initiation", "Baseline hemoglobin","Yr of HAART initiation","Lost-to-followup",
                            'Probable route of infection')

latex(summ.go.reduced, file="", prtest="P",exclude1=FALSE,long=TRUE,digits=2,caption="Descriptive statistics for variables of interest across geographic origin treating those with 'Unknown' geographic origin as missing.",size="small",landscape=TRUE)
@


<<echo=FALSE, results=tex>>=
summ.native <- summary(native.born.factor ~ gender.factor + cd4_t0_corrected + age + pi_based.factor +
                                       nnrti_based.factor + other_based.factor + rlt_based.factor + risk.factor + lrna_t0_corrected +
                                       stagec.factor + hemoglobin_t0_corrected + haartyr + l2fu.factor, data=sub.baseline,
                                       method="reverse",overall=TRUE,test=TRUE,digits=2)
summ.native$labels <- c("Sex","Baseline CD4","Baseline age","PI-based initial ART",
                        "NNRTI-based initial ART","Other-based initial ART","RLT-based initial ART","Probable route of infection",
                        "Baseline log viral load","ADE at ART initiation","Baseline hemoglobin",
                        "Yr of HAART initiation","Lost-to-followup")
latex(summ.native, file="", prtest="P",exclude1=FALSE,long=TRUE,digits=2,caption="Descriptive statistics for variables of interest across native born status.",landscape=TRUE)

# Redoing above table omitting those with 'Unknown' native born status:
sub.baseline$native.born.factor.full <- sub.baseline$native.born.factor
sub.baseline$native.born.factor <- factor(sub.baseline$native.born.factor, levels=c("Yes","No"))
summ.native.reduced <- summary(native.born.factor ~ gender.factor + cd4_t0_corrected + age + pi_based.factor +
                                       nnrti_based.factor + other_based.factor + rlt_based.factor + risk.factor + lrna_t0_corrected +
                                       stagec.factor + hemoglobin_t0_corrected + haartyr + l2fu.factor, data=sub.baseline,
                                       method="reverse",overall=TRUE,test=TRUE,digits=2)
summ.native.reduced$labels <- c("Sex","Baseline CD4","Baseline age","PI-based initial ART",
                                "NNRTI-based initial ART","Other-based initial ART","RLT-based initial ART","Probable route of infection",
                                "Baseline log viral load","ADE at ART initiation","Baseline hemoglobin",
                                "Yr of HAART initiation","Lost-to-followup")
latex(summ.native.reduced, file="", prtest="P",exclude1=FALSE,long=TRUE,digits=2,caption="Descriptive statistics for variables of interest across native born status treating those with 'Unknown' status as missing.",landscape=TRUE)
@


<<echo=FALSE, results=hide>>=
# Adding in baseline viral load information created by Bryan using vl-data-manip.R
load('baselineVLdata.Rda')
sub.baseline$rna_t0_corrected2 <- baseline.vl.data[match(sub.baseline$unique_id,baseline.vl.data$unique_id,nomatch=NA),'VL']

@

                        %-------------------------------------------------------------%
                        %     Unadjusted and Adjusted Analyses with ADE outcome       %
                        %-------------------------------------------------------------%

%---------------------------------------------------------------------------------%
%                                All comers analysis                              %
%---------------------------------------------------------------------------------%
<<echo=FALSE, results=hide>>=
#-------------------------------------#
#    Follow-up time in first year     #
#-------------------------------------#
sub.baseline$time_to_any_ade <- with(sub.baseline, event_d1 - haart_d)
end_of_follow_up_time_first_year <- apply(subset(sub.baseline,
                                          select=c(l2fu_d,event_d1,clinfup_d,death_d,end_of_first_year_haart,closeDate)),1,
                                          FUN=min,na.rm=TRUE)
sub.baseline$end_of_follow_up_time_first_year <- as.Date(end_of_follow_up_time_first_year)
sub.baseline$followup_time_first_year <- with(sub.baseline, as.numeric(end_of_follow_up_time_first_year - haart_d)/365.25)

end_of_follow_up <- apply(subset(sub.baseline,
                                 select=c(l2fu_d,event_d1,clinfup_d,death_d,closeDate)),1,
                          FUN=min,na.rm=TRUE)
sub.baseline$end_of_follow_up <- as.Date(end_of_follow_up)
sub.baseline$followup_time <- with(sub.baseline, as.numeric(end_of_follow_up - haart_d)/365.25)
sub.baseline$time_to_non_tb_ade <- with(sub.baseline, non_tb_event_d - haart_d)
end_of_follow_up_time_non_tb_first_year <- apply(subset(sub.baseline, select=c(l2fu_d,non_tb_event_d,death_d,clinfup_d,
                                                 end_of_first_year_haart,closeDate)),1,FUN=min,na.rm=TRUE)
sub.baseline$end_of_follow_up_non_tb_first_year <- as.Date(end_of_follow_up_time_non_tb_first_year)
sub.baseline$followup_time_non_tb_first_year <- with(sub.baseline, as.numeric(end_of_follow_up_non_tb_first_year -
                                                                   haart_d)/365.25)

end_of_followup_time_l2fu_first_year <- apply(subset(sub.baseline, select=c(event_d1,death_d,l2fu_d,end_of_first_year_haart,closeDate)),
                                                           1,FUN=min,na.rm=TRUE)
sub.baseline$end_of_followup_time_l2fu_first_year <- as.Date(end_of_followup_time_l2fu_first_year)
sub.baseline$followup_time_l2fu_first_year <- with(sub.baseline, as.numeric((end_of_followup_time_l2fu_first_year - haart_d))/365.25)

#------------------------------------------------------------------------------------------------------------#
#                                       Geographic Origin Analyses                                           #
#------------------------------------------------------------------------------------------------------------#
sub.baseline <- upData(sub.baseline, drop=c("event_d8","event8","lost_d"))
ddist <- datadist(sub.baseline)
options(datadist='ddist')

geo.baseline <- subset(sub.baseline, !is.na(artcc_origin_g.factor))
geo.baseline <- upData(geo.baseline, drop="event_d7")
ddist <- datadist(geo.baseline)
options(datadist='ddist')

geo.baseline.europe <- subset(geo.baseline, european.clinic.factor == "European site")
geo.baseline.europe <- upData(geo.baseline.europe, drop="rna_u")
ddist.europe <- datadist(geo.baseline.europe)

geo.baseline.north_american <- subset(geo.baseline, european.clinic.factor == "North American site")
geo.baseline.north_american <- upData(geo.baseline.north_american, drop=c("event_d5","event_d6","event6","event7","haart0_d",
                                                                          "event6_name","event7_name","event8_name"))
ddist.north_american <- datadist(geo.baseline.north_american)


geographic.origin.analysis <- model.fun(data=geo.baseline,cov="go",type="all")
geographic.origin.nontb.analysis <- model.fun(data=geo.baseline,cov="go",type="non_tb")
@


\begin{figure}
\caption{Probability of any ADE by baseline CD4.}
<<echo=FALSE, fig=TRUE>>=
mod.ade <- geographic.origin.analysis[[which(names(geographic.origin.analysis)=='any.ade.adj')]]
par(mfrow=c(2,2),mar=c(1,1,1,1))
print(plot(Predict(mod.ade, cd4_t0_corrected,fun=plogis),xlab='Baseline CD4',ylab='Probability of any ADE in first year',
      adj.subtitle=FALSE))


@
\end{figure}

\clearpage

\begin{figure}
\caption{Hazard of any ADE by baseline age.}
<<echo=FALSE, fig=TRUE>>=
print(plot(Predict(mod.ade, age,fun=exp),xlab='Baseline age',ylab='Hazard of any ADE in first year',
      adj.subtitle=FALSE))
@
\end{figure}

\clearpage

\begin{figure}
\caption{Hazard of any ADE by year of HAART start.}
<<echo=FALSE, fig=TRUE>>=
print(plot(Predict(mod.ade, haartyr,fun=exp),xlab='Year of HAART start',ylab='Hazard of any ADE in first year',
      adj.subtitle=FALSE))
@
\end{figure}


<<echo=FALSE, results=tex>>=
#----------------------------------------#
#              Printing data             #
#----------------------------------------#
# Unadjusted
df.tot.unadj <- cbind(geographic.origin.analysis[[1]],geographic.origin.analysis[[2]][,-1])

latex(df.tot.unadj, file="", rowname=NULL, cgroup=c("","Any ADE","ADE or Death"),n.cgroup=c(1,3,3),
      col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any ADE or ADE/Death in the first year after HAART start and geographic origin.  Results are stratified by cohort.",where="!h",
      insert.bottom="Overall p-value for geographic origin:  Any ADE:  p=0.003; ADE/Death:  p = 0.12.")

# Adjusted
df.tot <- cbind(geographic.origin.analysis[[3]],geographic.origin.analysis[[24]],geographic.origin.analysis[[4]])

latex(df.tot, file="", rowname=NULL, cgroup=c("","Any ADE",'Death',"ADE or Death"),n.cgroup=c(1,3,3,3),
      col.just=c("l","r","r","r||","r","r","r||","r","r","r"),landscape=TRUE, size="small",caption="Adjusted survival models investigating the association of time to any ADE, Death, or ADE/Death in the first year after HAART start and geographic origin adjusting for a priori selected confounders (reference group for geographic origin is 'WEST' and for risk factors is 'Heterosexual').  Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value:  any ADE:  p < 0.001; Death: p = 0.004; ADE/Death:  p < 0.001.")

# L2FU - unadjusted
df.tot.l2fu.unadj <- geographic.origin.analysis[[11]]

latex(df.tot.l2fu.unadj, file="", rowname=NULL, landscape=TRUE,size="small",caption="Unadjusted survival model investigating the association of time to L2FU in the first year after HAART start and geographic origin.  Results are stratified by cohort.",where="!h",insert.bottom="Overall p-value for geographic origin:  p < 0.001.")

# L2FU - adjusted
df.tot.l2fu <- geographic.origin.analysis[[12]]

latex(df.tot.l2fu, file="", rowname=NULL, landscape=TRUE,size="small",caption="Adjusted survival model investigating the association of time to L2FU in the first year after HAART start and geographic origin adjusting for a priori selected confounders (reference group for geographic origin is 'WEST' and for risk factors is 'Heterosexual').  Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value: p < 0.001.")
@


<<echo=FALSE, results=tex>>=
#----------------------------------------#
#              Printing data             #
#----------------------------------------#
# Unadjusted
df.tot.nontb.unadj <- cbind(geographic.origin.nontb.analysis[[1]],geographic.origin.nontb.analysis[[2]][,-1])

latex(df.tot.nontb.unadj, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"),n.cgroup=c(1,3,3),
      col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and geographic origin.  Results are stratified by cohort.",where="!h",
      insert.bottom="Overall p-value for geographic origin:  Any ADE:  p=0.20; ADE/Death:  p = 0.04.")

# Adjusted
df.nontb.tot <- cbind(geographic.origin.nontb.analysis[[3]],geographic.origin.nontb.analysis[[24]],geographic.origin.nontb.analysis[[4]])

latex(df.nontb.tot, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","Death","non-TB ADE or Death"),n.cgroup=c(1,3,3,3),
      col.just=c("l","r","r","r||","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any non-TB ADE, Death, or non-TB ADE/Death in the first year after HAART start and geographic origin adjusting for a priori selected confounders (reference group for geographic origin is 'WEST' and for risk factors is 'Heterosexual').  Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value:  any ADE:  p < 0.001; Death:  p = 0.004; ADE/Death:  p < 0.001.")
@


<<echo=FALSE, results=hide>>=
#------------------------------------------------------------------------------------------------------------#
#                                          Native Born Analyses                                              #
#------------------------------------------------------------------------------------------------------------#

nb.baseline <- subset(sub.baseline, !is.na(native.born.factor))
nb.baseline <- upData(nb.baseline, drop=c("recruit_d","event_d7"))
nb.ddist <- datadist(nb.baseline)
options(datadist='nb.ddist')

nb.baseline.europe <- subset(nb.baseline, european.clinic.factor == "European site")
nb.baseline.europe <- upData(nb.baseline.europe, drop="rna_u")
nb.ddist.europe <- datadist(nb.baseline.europe)

nb.baseline.north_american <- subset(nb.baseline, european.clinic.factor == "European site")
nb.baseline.north_american <- upData(nb.baseline.north_american, drop=c("event_d5","event_d6","event6","event7","haart0_d",
                                                                        "event6_name","event7_name","event8_name",
                                                                        "rna_u"))
nb.ddist.north_american <- datadist(nb.baseline.north_american)

native.born.analysis <- model.fun(data=nb.baseline, cov="nb",type="all")
native.born.nontb.analysis <- model.fun(data=nb.baseline, cov="nb",type="non_tb")
@

<<echo=FALSE, results=hide>>=
# Testing for interaction (ie., H0:  beta_europe = beta_north_american)
europe.any.ade.beta <- native.born.analysis[[16]]$coeff[1]
europe.any.ade.var <- diag(native.born.analysis[[16]]$var)[1]
europe.ade.or.dth.beta <- native.born.analysis[[17]]$coeff[1]
europe.ade.or.dth.var <- diag(native.born.analysis[[17]]$var)[1]

north_american.any.ade.beta <- native.born.analysis[[18]]$coeff[1]
north_american.any.ade.var <- diag(native.born.analysis[[18]]$var)[1]
north_american.ade.or.dth.beta <- native.born.analysis[[19]]$coeff[1]
north_american.ade.or.dth.var <- diag(native.born.analysis[[19]]$var)[1]

any.ade.test.statistic <- (north_american.any.ade.beta - europe.any.ade.beta - 0)/
                           (sqrt(north_american.any.ade.var + europe.any.ade.var))
any.ade.p <- 2*(1-pnorm(abs(any.ade.test.statistic)))

ade.or.dth.test.statistic <- (north_american.ade.or.dth.beta - europe.ade.or.dth.beta - 0)/
                              (sqrt(north_american.ade.or.dth.var + europe.ade.or.dth.var))
ade.or.dth.p <- 2*(1-pnorm(abs(ade.or.dth.test.statistic)))

@


<<echo=FALSE, results=tex>>=
#----------------------------------------#
#              Printing data             #
#----------------------------------------#
# Unadjusted
df.tot.nb.unadj <- c(native.born.analysis[[1]],native.born.analysis[[2]][-1],native.born.analysis[[27]][-1])
df.tot.nb.unadj <- t(as.matrix(df.tot.nb.unadj))
colnames(df.tot.nb.unadj) <- c("Covariate","HR","95\\% CI","P","HR","95\\% CI","P",'HR','95\\% CI','P')

latex(df.tot.nb.unadj, file="", rowname=NULL, cgroup=c("","Any ADE","ADE or Death",'Death'),n.cgroup=c(1,3,3,3),
      col.just=c("l","r","r","r||","r","r","r||",'r','r','r'),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any ADE, ADE/Death, or Death in the first year after HAART start and native born status (reference group for native born analysis is 'Native born').  Results are stratified by cohort.",where="!h",insert.bottom="Overall p-value for native born status:  Any ADE:  p < 0.001; ADE/Death:  p = 0.27; Death:  p < 0.001." )

# Adjusted
df.tot.nb <- cbind(native.born.analysis[[3]],native.born.analysis[[24]],native.born.analysis[[4]])

latex(df.tot.nb, file="", rowname=NULL, cgroup=c("","Any ADE","Death","ADE or Death"),n.cgroup=c(1,3,3,3),
      col.just=c("l","r","r","r||","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any ADE, Death, or ADE/Death in the first year after HAART start and native born status adjusting for a priori selected confounders (reference group for native born status is 'Native born' and for risk factors is 'Heterosexual').  Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value:  any ADE:  p < 0.001; Death:  p = 0.004; ADE/Death:  p < 0.001.  Restricting the analysis to only heterosexuals gives HR, CI, p for native-born status 1) any ADE:  1.23 (1.08, 1.40), 0.002; 2) ADE/Death:  1.14, (1.01, 1.28), p=0.03; 3) Death:  0.87, (0.69, 1.11), p=0.26.")

# L2FU - unadjusted
df.tot.nb.l2fu.unadj <- native.born.analysis[[11]]

latex(df.tot.nb.l2fu.unadj, file="", rowname=NULL, landscape=TRUE,size="small",caption="Unadjusted survival model investigating the association of time to L2FU in the first year after HAART start and native born status.  Results are stratified by cohort.", where="!h", insert.bottom="Overall p-value for native born status:  p < 0.001.")

# L2FU - adjusted
df.tot.nb.l2fu <- native.born.analysis[[12]]

latex(df.tot.nb.l2fu, file="", rowname=NULL, landscape=TRUE,size="small",caption="Adjusted survival model investigating the association of time to L2FU in the first year after HAART start and native born status adjusting for a priori selected confounders (reference group for native born status is 'Native born' and for risk factors is 'Heterosexual').  Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value: p < 0.001.")

@


<<echo=FALSE, results=tex>>=
#----------------------------------------#
#              Printing data             #
#----------------------------------------#
# Unadjusted
df.tot.nb.nontb.unadj <- c(native.born.nontb.analysis[[1]],native.born.nontb.analysis[[2]][-1])
df.tot.nb.nontb.unadj <- t(as.matrix(df.tot.nb.nontb.unadj))
colnames(df.tot.nb.nontb.unadj) <- c("Covariate","HR","95\\% CI","P","HR","95\\% CI","P")

latex(df.tot.nb.nontb.unadj, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"),
      n.cgroup=c(1,3,3),
      col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and native born status (reference group for native born analysis is 'Native born').  Results are stratified by cohort.",where="!h",insert.bottom="Overall p-value for native born status:  Any ADE:  p = 0.41; ADE/Death:  p = 0.16." )

# Adjusted
df.tot.nb.nontb <- cbind(native.born.nontb.analysis[[3]],native.born.nontb.analysis[[4]])

latex(df.tot.nb.nontb, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"),n.cgroup=c(1,3,3),
      col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and native born status adjusting for a priori selected confounders (reference group for native born status is 'Native born' and for risk factors is 'Heterosexual').  Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value:  any ADE:  p < 0.001; ADE/Death:  p < 0.001.")
@

\begin{figure}[!h]
\begin{center}
<<echo=FALSE, fig=TRUE>>=
par(mfrow=c(2,2),mar=c(4,4,1,1),oma=c(1,1,1,1),cex.lab=0.80)
# Need to create 3-level censoring variable:  0 = no event; 1 = ADE; 2 = Death w/o ADE
nb.baseline$new_censor <- ifelse(nb.baseline$ade_first_year_of_haart == 1,1,
                                 ifelse(nb.baseline$ade_first_year_of_haart == 0 &
                                        nb.baseline$death_first_year_of_haart == 1,2,
                                        ifelse(!is.na(nb.baseline$ade_first_year_of_haart),0,NA)))
nb.baseline$new_l2fu_censor <- ifelse(nb.baseline$l2fu == 1,1,
                                 ifelse(nb.baseline$l2fu == 0 &
                                        nb.baseline$death_first_year_of_haart == 1,2,
                                        ifelse(!is.na(nb.baseline$l2fu),0,NA)))

nb.baseline$native.born.factor <- factor(nb.baseline$native.born, levels=c(0,1),labels=c('Migrant','Non-migrant'))
nb.baseline$followup_time_first_year_months <- nb.baseline$followup_time_first_year*12
nb.baseline$followup_time_l2fu_first_year_months <- nb.baseline$followup_time_l2fu_first_year*12
f <- cuminc(ftime=nb.baseline$followup_time_first_year_months,
           fstatus=nb.baseline$ade_first_year_of_haart,
            group=nb.baseline$native.born.factor,
            cencode="0",rho=0)
fb <- cuminc(ftime=nb.baseline$followup_time_first_year_months,
           fstatus=nb.baseline$new_censor,
            group=nb.baseline$native.born.factor,
            cencode="0",rho=0)


plot(f, ylim=c(0,0.10), xlab="Months from ART initiation",
     ylab="Cumulative incidence of ADE",cex=0.8,lty=c(2,1),
     curvlab=c('Migrant','Non-migrant'),axes=FALSE,lwd=2)
axis(1,at=c(0,3,6,9,12),labels=c(0,3,6,9,12))
axis(2)
box(bty='l')
 
plot(fb, ylim=c(0,0.10), xlab="Months from ART initiation",
    ylab="Cumulative incidence of ADE",cex=0.75,lty=c(2,1,0,0),
    curvlab=c('Migrant','Non-migrant','',''),axes=FALSE,lwd=2)
axis(1,at=c(0,3,6,9,12),labels=c(0,3,6,9,12))
axis(2)
box(bty='l')

text(9.5,0.238,paste("ADE:  ",
                  "p=",round(summary(unadj.mod.ade.adedth)$coef[1,5],2),sep=""),
                  cex=0.85)
geo.baseline$followup_time_first_year_months <- geo.baseline$followup_time_first_year*12
geo.baseline$followup_time_l2fu_first_year_months <- geo.baseline$followup_time_l2fu_first_year*12
geo.baseline$new_censor <- ifelse(geo.baseline$ade_first_year_of_haart == 1,1,
                                 ifelse(geo.baseline$ade_first_year_of_haart == 0 &
                                        geo.baseline$death_first_year_of_haart == 1,2,
                                        ifelse(!is.na(geo.baseline$ade_first_year_of_haart),0,NA)))
geo.baseline$new_l2fu_censor <- ifelse(geo.baseline$l2fu == 1,1,
                                 ifelse(geo.baseline$l2fu == 0 &
                                        geo.baseline$death_first_year_of_haart == 1,2,
                                        ifelse(!is.na(geo.baseline$l2fu),0,NA)))
geo.baseline$new_l2fu_censor.factor <- factor(geo.baseline$new_l2fu_censor, levels=c(0,1,2),
                                              labels=c('Censored','L2FU','Died'))
geo.baseline$artcc_origin_g_b.factor <- factor(as.character(geo.baseline$artcc_origin_g.factor),
                                               levels=c('WEST','SSA','LA','NAME','ASIA'))
f2 <- cuminc(ftime=geo.baseline$followup_time_first_year_months,
            fstatus=geo.baseline$new_censor,
             group=geo.baseline$artcc_origin_g_b.factor,
             cencode='0',rho=0)


plot(f2, ylim=c(0,0.10), xlab='Months from ART initiation',
     ylab='Cumulative incidence of ADE',cex=0.75,lty=c(1,2,5,3,4,0,0,0,0,0),col=c(1,2,5,3,4,0,0,0,0,0),
#     curvlab=c('Non-migrant','NAME','SSA','Asia','LA','','','','',''),axes=FALSE)
     curvlab=c('Non-migrant','Sub-Saharan Africa','Latin America','North Africa/Middle East','Asia',
               '','','','',''),axes=FALSE,lwd=c(rep(2,times=3),3.0,2))
axis(1,at=c(0,3,6,9,12),labels=c(0,3,6,9,12))
axis(2)
box(bty='l')


\begin{figure}[height=2in,width=2in]
\caption{Kaplan Meier curve for probability of survival across migrant status/geographic origin.}
<<echo=FALSE, fig=TRUE>>=
#------------------------------------#
#      Time-to-event analysis        #
#------------------------------------#
# Kaplan-Meier curve

Srv.ade.new <- Surv(nb.baseline$followup_time_first_year,nb.baseline$ade_first_year_of_haart)
Srv.ade.new.months <- Surv(nb.baseline$followup_time_first_year_months,nb.baseline$ade_first_year_of_haart)


dd <- datadist(nb.baseline)
options(datadist='dd')
par(mfrow=c(2,2),mar=c(4,4,3,1))
plot(survfit(Srv.ade.new.months ~ nb.baseline$native.born.factor), mark.time=FALSE,
     lty=c(2,1), frame.plot=FALSE,ylim=c(0,0.10),fun=function(x)(1-x),
     xlab="Months from ART initiation",ylab="Probability of ADE",axes=FALSE,lwd=2)
axis(1,at=c(0,3,6,9,12),labels=c(0,3,6,9,12))
axis(2)
box(bty='l')
legend("topleft", legend=c("Migrant",
                           "Non-migrant"),
       lty=c(2,1),lwd=2,
       cex=0.90,bty="n")


logrank.test <- logrank(Srv.ade.new, nb.baseline$native.born.factor)
logrank.p <- 1-pchisq(logrank.test[1],df=1)
logrank.p <- ifelse(logrank.p < 0.001,"< 0.001",
                    ifelse(logrank.p > 0.001 & logrank.p < 0.01,round(logrank.p,3),round(logrank.p,2)))


Srv.ade.new2 <- Surv(geo.baseline$followup_time_first_year,geo.baseline$ade_first_year_of_haart)
Srv.ade.new2.months <- Surv(geo.baseline$followup_time_first_year_months,geo.baseline$ade_first_year_of_haart)

dd <- datadist(geo.baseline)
options(datadist='dd')

plot(survfit(Srv.ade.new2.months ~ geo.baseline$artcc_origin_g_b.factor), mark.time=FALSE,
     lty=c(1,1,2,2,3),      
     col=c(1,'grey70',1,'grey70',1),
     lwd=c(rep(2,times=3),2.0,2),
     frame.plot=FALSE,ylim=c(0.0,0.10),fun=function(x)(1-x),
     xlab="Months from ART initiation",ylab="Probability of ADE",axes=FALSE)
axis(1,at=c(0,3,6,9,12),labels=c(0,3,6,9,12))
axis(2)
box(bty='l')
legend("topleft", legend=c('Non-migrant','Sub-Saharan Africa','Latin America','North Africa/Middle East','Asia'),
       col=c(1,'grey70',1,'grey70',1),
       lwd=c(rep(2,times=3),2.0,2),
       lty=c(1,1,2,2,3),
       cex=0.90,bty="n")

logrank.test <- logrank(Srv.ade.new2, geo.baseline$artcc_origin_g.factor)
logrank.p <- 1-pchisq(logrank.test[1],df=1)
logrank.p <- ifelse(logrank.p < 0.001,"< 0.001",
                    ifelse(logrank.p > 0.001 & logrank.p < 0.01,round(logrank.p,3),round(logrank.p,2)))
@
\end{figure}


\begin{figure}[height=2in,width=2in]
\caption{Kaplan Meier curve for probability of survival across migrant status/geographic origin.}
<<echo=FALSE, fig=TRUE>>=
#------------------------------------#
#      Time-to-event analysis        #
#------------------------------------#
# Kaplan-Meier curve

Srv.dth <- Surv(nb.baseline$followup_time_first_year,nb.baseline$death_first_year_of_haart)
Srv.dth.months <- Surv(nb.baseline$followup_time_first_year_months,nb.baseline$death_first_year_of_haart)

dd <- datadist(nb.baseline)
options(datadist='dd')

plot(survfit(Srv.dth.months ~ nb.baseline$native.born.factor), mark.time=FALSE,
     lty=c(2,1), frame.plot=FALSE,ylim=c(0,0.05),fun=function(x)(1-x),
     xlab="Months from ART initiation",ylab="Probability of death",axes=FALSE,lwd=2)
axis(1,at=c(0,3,6,9,12),labels=c(0,3,6,9,12))
axis(2)
box(bty='l')
legend("topleft", legend=c("Migrant",
                               "Non-migrant"),
       lty=c(2,1),lwd=2,
       cex=0.90,bty="n")


logrank.test <- logrank(Srv.dth, nb.baseline$native.born.factor)
logrank.p <- 1-pchisq(logrank.test[1],df=1)
logrank.p <- ifelse(logrank.p < 0.001,"< 0.001",
                    ifelse(logrank.p > 0.001 & logrank.p < 0.01,round(logrank.p,3),round(logrank.p,2)))


Srv.dth2 <- Surv(geo.baseline$followup_time_first_year,geo.baseline$death_first_year_of_haart)
Srv.dth2.months <- Surv(geo.baseline$followup_time_first_year_months,geo.baseline$death_first_year_of_haart)

dd <- datadist(geo.baseline)
options(datadist='dd')

plot(survfit(Srv.dth2.months ~ geo.baseline$artcc_origin_g_b.factor), mark.time=FALSE,
       lty=c(1,1,2,2,3),      
     col=c(1,'grey70',1,'grey70',1),
     lwd=c(rep(2,times=3),2.0,2),
     frame.plot=FALSE,ylim=c(0.0,0.05),fun=function(x)(1-x),
     xlab="Months from ART initiation",ylab="Probability of death",axes=FALSE)
     
     
axis(1,at=c(0,3,6,9,12),labels=c(0,3,6,9,12))
axis(2)
box(bty='l')
legend("topleft", legend=c('Non-migrant','Sub-Saharan Africa','Latin America','North Africa/Middle East','Asia'),
       col=c(1,'grey70',1,'grey70',1),
       lwd=c(rep(2,times=3),2.0,2),
       lty=c(1,1,2,2,3),
       cex=0.90,bty="n")

logrank.test <- logrank(Srv.dth2, geo.baseline$artcc_origin_g.factor)
logrank.p <- 1-pchisq(logrank.test[1],df=1)
logrank.p <- ifelse(logrank.p < 0.001,"< 0.001",
                    ifelse(logrank.p > 0.001 & logrank.p < 0.01,round(logrank.p,3),round(logrank.p,2)))

text(x=0.30,y=0.05,paste("Logrank p-value = ",logrank.p,sep=""),cex=0.65)

@
\end{center}
\end{figure}


\begin{figure}
\begin{center}
<<echo=FALSE, fig=TRUE>>=
f.l2fu <- cuminc(ftime=nb.baseline$followup_time_l2fu_first_year_months,
                fstatus=nb.baseline$new_l2fu_censor,
                group=nb.baseline$native.born.factor,
                cencode='0',rho=0)
f2.l2fu <- cuminc(ftime=geo.baseline$followup_time_l2fu_first_year_months,
            fstatus=geo.baseline$new_l2fu_censor,
             group=geo.baseline$artcc_origin_g_b.factor,
             cencode='0',rho=0)

par(mfrow=c(2,2),mar=c(4,4,1,1))
plot(f.l2fu, ylim=c(0,0.20), xlab="Months from cART initiation",
     ylab="Cumulative incidence of LTFU",cex=0.8,lty=c(2,1,0,0),
     curvlab=c('Migrant','Non-migrant','',''),axes=FALSE)
axis(1,at=c(0,3,6,9,12),labels=c(0,3,6,9,12))
axis(2)
box(bty='l')

plot(f2.l2fu, ylim=c(0,0.20), xlab="Months from cART initiation",
     ylab="Cumulative incidence of LTFU",cex=0.8,lty=c(1,2,5,3,4,0,0,0,0,0),col=c(1,2,5,3,4,0,0,0,0,0),
     curvlab=c('Non-migrant','Sub-Saharan Africa','Latin America','North Africa/Middle East','Asia'),axes=FALSE)
axis(1,at=c(0,3,6,9,12),labels=c(0,3,6,9,12))
axis(2)
box(bty='l')

 covb <- col(matrix(0,nrow=dim(geo.baseline)[1],ncol=length(levels(geo.baseline$artcc_origin_g.factor)))) == as.integer(geo.baseline$artcc_origin_g.factor)
 colnames(covb) <- c('Non-migrant','NAME','SSA','ASIA','LA')
 covb <- covb[,-1]
 l2fu.mod.geo <- crr(ftime=geo.baseline$followup_time_l2fu_first_year,
                fstatus=geo.baseline$new_l2fu_censor,
                cov1=covb,
                failcode=1)
mod.geo <- crr(ftime=geo.baseline$followup_time_first_year,
               fstatus=geo.baseline$new_censor,
               cov1=covb,
               failcode=1)
mod.geo.dth <- crr(ftime=geo.baseline$followup_time_first_year,
                   fstatus=geo.baseline$death_first_year_of_haart,
                   cov1=covb,
                   failcode=1)
@
\end{center}
\end{figure}


%---------------------------------------------------------------------------------%
%                                Excluding MSM analysis                           %
%---------------------------------------------------------------------------------%
<<echo=FALSE, results=hide>>=
msm.baseline <- subset(sub.baseline, risk.factor != "MSM")
msm.baseline$risk.factor <- factor(msm.baseline$risk.factor, levels=c("Heterosexual","IDU","Blood, Other or unknown"))
ddist.msm <- datadist(msm.baseline)
options(datadist='ddist.msm')

#------------------------------------------------------------------------------------------------------------#
#                                       Geographic Origin Analyses                                           #
#------------------------------------------------------------------------------------------------------------#
msm.geo.baseline <- subset(msm.baseline, !is.na(artcc_origin_g.factor))
msm.geo.baseline <- upData(msm.geo.baseline, drop="event_d7")
ddist.msm.go <- datadist(msm.geo.baseline)
options(datadist='ddist.msm.go')

msm.geo.baseline.europe <- subset(msm.geo.baseline, european.clinic.factor == "European site")
msm.geo.baseline.europe <- upData(msm.geo.baseline.europe, drop="rna_u")
msm.geo.ddist.europe <- datadist(msm.geo.baseline.europe)

msm.geo.baseline.north_american <- subset(msm.geo.baseline, european.clinic.factor == "European site")
msm.geo.baseline.north_american <- upData(msm.geo.baseline.north_american, drop=c("event_d5","event_d6","event6","event7","haart0_d",
                                                                                  "event6_name","event7_name","event8_name",
                                                                                  "rna_u"))
msm.geo.ddist.north_american <- datadist(msm.geo.baseline.north_american)


msm.geographic.origin.analysis <- model.fun(data=msm.geo.baseline,cov="go",type="all")
msm.geographic.origin.nontb.analysis <- model.fun(data=msm.geo.baseline,cov="go",type="non_tb")
@

<<echo=FALSE, results=tex>>=
#----------------------------------------#
#              Printing data             #
#----------------------------------------#
# Unadjusted
msm.df.tot.unadj <- cbind(msm.geographic.origin.analysis[[1]],msm.geographic.origin.analysis[[2]][,-1])

latex(msm.df.tot.unadj, file="", rowname=NULL, cgroup=c("","Any ADE","ADE or Death"),n.cgroup=c(1,3,3),
      col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any ADE or ADE/Death in the first year after HAART start and geographic origin, excluding MSM subjects (reference for geographic origin is 'WEST').  Results are stratified by cohort.",where="!h",
      insert.bottom="Overall p-value for geographic origin:  Any ADE:  p=0.02; ADE/Death:  p = 0.15.")

# Adjusted
msm.df.tot <- cbind(msm.geographic.origin.analysis[[3]],msm.geographic.origin.analysis[[4]])

latex(msm.df.tot, file="", rowname=NULL, cgroup=c("","Any ADE","ADE or Death"),n.cgroup=c(1,3,3),
      col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any ADE or ADE/Death in the first year after HAART start and geographic origin adjusting for a priori selected confounders and excluding MSM subjects (reference for geographic origin is 'WEST' and for risk factors is 'Heterosexual').  Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value:  any ADE: p < 0.001; ADE/Death:  p < 0.001.")

# L2FU - unadjusted
msm.df.tot.l2fu.unadj <- msm.geographic.origin.analysis[[11]]

latex(msm.df.tot.l2fu.unadj, file="", rowname=NULL, landscape=TRUE,size="small",caption="Unadjusted survival model investigating the association of time to L2FU in the first year after HAART start and geographic origin, excluding MSM subjects (reference for geographic origin is 'WEST').  Results are stratified by cohort.", where="!h", insert.bottom="Overall p-value for race:  p < 0.001.")

# L2FU - adjusted
msm.df.tot.l2fu <- msm.geographic.origin.analysis[[12]]

latex(msm.df.tot.l2fu, file="", rowname=NULL, landscape=TRUE,size="small",caption="Adjusted survival model investigating the association of time to L2FU in the first year after HAART start and geographic origin adjusting for a priori selected confounders and excluding MSM subjects (reference group for geographic origin is 'WEST' and for risk factors is 'Heterosexual').  Results are stratified by cohort.",where="!h", insert.bottom="Total non-linear p-value: p < 0.001.")


@


<<echo=FALSE, results=tex>>=
#----------------------------------------#
#              Printing data             #
#----------------------------------------#
# Unadjusted
msm.df.tot.nontb.unadj <- cbind(msm.geographic.origin.nontb.analysis[[1]],msm.geographic.origin.nontb.analysis[[2]][,-1])

latex(msm.df.tot.nontb.unadj, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"),
      n.cgroup=c(1,3,3),
      col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and geographic origin, excluding MSM subjects (reference for geographic origin is 'WEST').  Results are stratified by cohort.",where="!h",
      insert.bottom="Overall p-value for geographic origin:  Any ADE:  p=0.30; ADE/Death:  p = 0.01.")

# Adjusted
msm.df.tot.nontb <- cbind(msm.geographic.origin.nontb.analysis[[3]],msm.geographic.origin.nontb.analysis[[4]])

latex(msm.df.tot.nontb, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"),n.cgroup=c(1,3,3),
      col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and geographic origin adjusting for a priori selected confounders and excluding MSM subjects (reference for geographic origin is 'WEST' and for risk factors is 'Heterosexual').  Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value:  any ADE: p < 0.001; ADE/Death:  p < 0.001.")
@


<<echo=FALSE, results=hide>>=
#------------------------------------------------------------------------------------------------------------#
#                                          Native Born Analyses                                              #
#------------------------------------------------------------------------------------------------------------#

msm.nb.baseline <- subset(msm.baseline, !is.na(native.born.factor))
msm.nb.baseline <- upData(msm.nb.baseline, drop=c("recruit_d","event_d7"))
msm.nb.ddist <- datadist(msm.nb.baseline)
options(datadist='msm.nb.ddist')


msm.nb.baseline.europe <- subset(msm.nb.baseline, european.clinic.factor == "European site")
msm.nb.baseline.europe <- upData(msm.nb.baseline.europe, drop="rna_u")
msm.nb.ddist.europe <- datadist(msm.nb.baseline.europe)

msm.nb.baseline.north_american <- subset(msm.nb.baseline, european.clinic.factor == "European site")
msm.nb.baseline.north_american <- upData(msm.nb.baseline.north_american, drop=c("event_d5","event_d6","event6","event7","haart0_d",
                                                                                "event6_name","event7_name","event8_name",
                                                                                "rna_u"))
msm.nb.ddist.north_american <- datadist(msm.nb.baseline.north_american)


msm.native.born.analysis <- model.fun(data=msm.nb.baseline, cov="nb",type="all")
msm.native.born.nontb.analysis <- model.fun(data=msm.nb.baseline, cov="nb",type="non_tb")
@


<<echo=FALSE, results=tex>>=
#----------------------------------------#
#              Printing data             #
#----------------------------------------#
# Unadjusted
msm.df.tot.nb.unadj <- c(msm.native.born.analysis[[1]],msm.native.born.analysis[[2]][-1])
msm.df.tot.nb.unadj <- t(as.matrix(msm.df.tot.nb.unadj))
colnames(msm.df.tot.nb.unadj) <- c("Covariate","HR","95\\% CI","P","HR","95\\% CI","P")

latex(msm.df.tot.nb.unadj, file="", rowname=NULL, cgroup=c("","Any ADE","ADE or Death"),n.cgroup=c(1,3,3),
      col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any ADE or ADE/Death in the first year after HAART start and native born status, excluding MSM subjects (reference for native born status is 'Native Born').  Results are stratified by cohort.",where="!h",insert.bottom="Overall p-value for native born status:  any ADE:  p = 0.004; ADE/Death:  p = 0.89.")

# Adjusted
msm.df.tot.nb <- cbind(msm.native.born.analysis[[3]],msm.native.born.analysis[[4]])

latex(msm.df.tot.nb, file="", rowname=NULL, cgroup=c("","Any ADE","ADE or Death"),n.cgroup=c(1,3,3),
      col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any ADE or ADE/Death in the first year after HAART start and native born status adjusting for a priori selected confounders and excluding MSM subjects (reference for native born status is 'Native Born' and for risk factors is 'Heterosexual').  Results are stratified by cohort.",where="!h", insert.bottom="Total non-linear p-value:  any ADE:  p < 0.001; ADE/Death:  p < 0.001.")

# L2FU - unadjusted
msm.df.tot.nb.l2fu.unadj <- msm.native.born.analysis[[11]]

latex(msm.df.tot.nb.l2fu.unadj, file="", rowname=NULL, landscape=TRUE,size="small",caption="Unadjusted survival model investigating the association of time to L2FU in the first year after HAART start and native born status, excluding MSM subjects (reference for native born status is 'Yes').  Results are stratified by cohort.", where="!h", insert.bottom="Overall p-value for race:  p = 0.27.")

# L2FU - adjusted
msm.df.tot.nb.l2fu <- msm.native.born.analysis[[12]]

latex(msm.df.tot.nb.l2fu, file="", rowname=NULL, landscape=TRUE,size="small",caption="Adjusted survival model investigating the association of time to L2FU in the first year after HAART start and native born status adjusting for a priori selected confounders and excluding MSM subjects (reference group for native born status is 'Native born' and for risk factors is 'Heterosexual').  Results are stratified by cohort.",where="!h", insert.bottom="Total non-linear p-value: p < 0.001.")

@


<<echo=FALSE, results=tex>>=
#----------------------------------------#
#              Printing data             #
#----------------------------------------#
# Unadjusted
msm.df.tot.nb.nontb.unadj <- c(msm.native.born.nontb.analysis[[1]],msm.native.born.nontb.analysis[[2]][-1])
msm.df.tot.nb.nontb.unadj <- t(as.matrix(msm.df.tot.nb.nontb.unadj))
colnames(msm.df.tot.nb.nontb.unadj) <- c("Covariate","HR","95\\% CI","P","HR","95\\% CI","P")

latex(msm.df.tot.nb.nontb.unadj, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"),
      n.cgroup=c(1,3,3),
      col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and native born status, excluding MSM subjects (reference for native born status is 'Native Born').  Results are stratified by cohort.",where="!h",insert.bottom="Overall p-value for native born status:  any ADE:  p = 0.63; ADE/Death:  p = 0.03.")

# Adjusted
msm.df.tot.nb.nontb <- cbind(msm.native.born.nontb.analysis[[3]],msm.native.born.nontb.analysis[[4]])

latex(msm.df.tot.nb.nontb, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"),n.cgroup=c(1,3,3),
      col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and native born status adjusting for a priori selected confounders and excluding MSM subjects (reference for native born status is 'Native Born' and for risk factors is 'Heterosexual').  Results are stratified by cohort.",where="!h", insert.bottom="Total non-linear p-value:  any ADE:  p < 0.001; ADE/Death:  p < 0.001.")
@


%---------------------------------------------------------------------------------%
%                                Excluding IDU analysis                           %
%---------------------------------------------------------------------------------%
<<echo=FALSE, results=hide>>=
idu.baseline <- subset(sub.baseline, risk.factor != "IDU")
idu.baseline$risk.factor <- factor(idu.baseline$risk.factor, levels=c("Heterosexual","MSM","Blood, Other or unknown"))
ddist.idu <- datadist(idu.baseline)
options(datadist='ddist.idu')

#------------------------------------------------------------------------------------------------------------#
#                                       Geographic Origin Analyses                                           #
#------------------------------------------------------------------------------------------------------------#
idu.geo.baseline <- subset(idu.baseline, !is.na(artcc_origin_g.factor))
idu.geo.baseline <- upData(idu.geo.baseline, drop="event_d7")
ddist.idu.go <- datadist(idu.geo.baseline)
options(datadist='ddist.idu.go')

idu.geo.baseline.europe <- subset(idu.geo.baseline, european.clinic.factor == "European site")
idu.geo.baseline.europe <- upData(idu.geo.baseline.europe, drop="rna_u")
idu.geo.ddist.europe <- datadist(idu.geo.baseline.europe)

idu.geo.baseline.north_american <- subset(idu.geo.baseline, european.clinic.factor == "European site")
idu.geo.baseline.north_american <- upData(idu.geo.baseline.north_american, drop=c("event_d5","event_d6","event6","event7","haart0_d",
                                                                          "event6_name","event7_name","event8_name","rna_u"))
idu.geo.ddist.north_american <- datadist(idu.geo.baseline.north_american)

idu.geographic.origin.analysis <- model.fun(data=idu.geo.baseline,cov="go",type="all")
idu.geographic.origin.nontb.analysis <- model.fun(data=idu.geo.baseline,cov="go",type="non_tb")
@

<<echo=FALSE, results=tex>>=
#----------------------------------------#
#              Printing data             #
#----------------------------------------#
# Unadjusted
idu.df.tot.unadj <- cbind(idu.geographic.origin.analysis[[1]],idu.geographic.origin.analysis[[2]][,-1])

latex(idu.df.tot.unadj, file="", rowname=NULL, cgroup=c("","Any ADE","ADE or Death"),n.cgroup=c(1,3,3),
      col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any ADE or ADE/Death in the first year after HAART start and geographic origin, excluding IDU subjects (reference for geographic origin is 'WEST').  Results are stratified by cohort.",
      where="!h",insert.bottom="Overall p-value for geographic origin:  Any ADE:  p < 0.001; ADE/Death:  p = 0.10.")

# Adjusted
idu.df.tot <- cbind(idu.geographic.origin.analysis[[3]],idu.geographic.origin.analysis[[4]])

latex(idu.df.tot, file="", rowname=NULL, cgroup=c("","Any ADE","ADE or Death"),n.cgroup=c(1,3,3),
      col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any ADE or ADE/Death in the first year after HAART start and geographic origin adjusting for a priori selected confounders and excluding IDU subjects (reference for geographic origin is 'WEST' and for risk factors is 'Heterosexual').  Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value:  any ADE: p < 0.001; ADE/Death:  p < 0.001.")

# L2FU - unadjusted
idu.df.tot.l2fu.unadj <- idu.geographic.origin.analysis[[11]]

latex(idu.df.tot.l2fu.unadj, file="", rowname=NULL, landscape=TRUE,size="small",caption="Unadjusted survival model investigating the association of time to L2FU in the first year after HAART start and geographic analysis, excluding IDU subjects (reference for geographic origin is 'WEST').  Results are stratified by cohort.", where="!h", insert.bottom="Overall p-value for race:  p < 0.001.")

# L2FU - adjusted
idu.df.tot.l2fu <- idu.geographic.origin.analysis[[12]]

latex(idu.df.tot.l2fu, file="", rowname=NULL, landscape=TRUE,size="small",caption="Adjusted survival model investigating the association of time to L2FU in the first year after HAART start and geographic origin adjusting for a priori selected confounders and excluding IDU subjects (reference group for geographic origin is 'WEST' and for risk factors is 'Heterosexual').  Results are stratified by cohort.",where="!h", insert.bottom="Total non-linear p-value: p < 0.001.")


@


<<echo=FALSE, results=tex>>=
#----------------------------------------#
#              Printing data             #
#----------------------------------------#
# Unadjusted
idu.df.tot.unadj.nontb <- cbind(idu.geographic.origin.nontb.analysis[[1]],idu.geographic.origin.nontb.analysis[[2]][,-1])

latex(idu.df.tot.unadj.nontb, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"),
      n.cgroup=c(1,3,3),
      col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and geographic origin, excluding IDU subjects (reference for geographic origin is 'WEST').  Results are stratified by cohort.",
      where="!h",insert.bottom="Overall p-value for geographic origin:  Any ADE:  p=0.07; ADE/Death:  p = 0.07.")

# Adjusted
idu.df.tot.nontb <- cbind(idu.geographic.origin.nontb.analysis[[3]],idu.geographic.origin.nontb.analysis[[4]])

latex(idu.df.tot.nontb, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"),n.cgroup=c(1,3,3),
      col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and geographic origin adjusting for a priori selected confounders and excluding IDU subjects (reference for geographic origin is 'WEST' and for risk factors is 'Heterosexual').  Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value:  any ADE: p < 0.001; ADE/Death:  p < 0.001.")
@


<<echo=FALSE, results=hide>>=
#------------------------------------------------------------------------------------------------------------#
#                                          Native Born Analyses                                              #
#------------------------------------------------------------------------------------------------------------#

idu.nb.baseline <- subset(idu.baseline, !is.na(native.born.factor))
idu.nb.baseline <- upData(idu.nb.baseline, drop=c("event_d7","event_d8"))
idu.nb.ddist <- datadist(idu.nb.baseline)
options(datadist='idu.nb.ddist')

idu.nb.baseline.europe <- subset(idu.nb.baseline, european.clinic.factor == "European site")
idu.nb.baseline.europe <- upData(idu.nb.baseline.europe, drop="rna_u")
idu.nb.ddist.europe <- datadist(idu.nb.baseline.europe)

idu.nb.baseline.north_american <- subset(idu.nb.baseline, european.clinic.factor == "European site")
idu.nb.baseline.north_american <- upData(idu.nb.baseline.north_american, drop=c("event_d5","event_d6","event6","event7","haart0_d",
                                                                          "event6_name","event7_name","event8_name","rna_u"))
idu.nb.ddist.north_american <- datadist(idu.nb.baseline.north_american)


idu.native.born.analysis <- model.fun(data=idu.nb.baseline, cov="nb",type="all")
idu.native.born.nontb.analysis <- model.fun(data=idu.nb.baseline, cov="nb",type="non_tb") ## Need Bryan's input on this
@


<<echo=FALSE, results=tex>>=
#----------------------------------------#
#              Printing data             #
#----------------------------------------#
# Unadjusted
idu.df.tot.nb.unadj <- c(idu.native.born.analysis[[1]],idu.native.born.analysis[[2]][-1])
idu.df.tot.nb.unadj <- t(as.matrix(idu.df.tot.nb.unadj))
colnames(idu.df.tot.nb.unadj) <- c("Covariate","HR","95\\% CI","P","HR","95\\% CI","P")

latex(idu.df.tot.nb.unadj, file="", rowname=NULL, cgroup=c("","Any ADE","ADE or Death"),n.cgroup=c(1,3,3),
      col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any ADE or ADE/Death in the first year after HAART start and native born status, excluding IDU subjects (reference for native born status is 'Native Born').  Results are stratified by cohort.",where="!h",insert.bottom="Overall p-value for native born status:  any ADE:  p < 0.001; ADE/Death:  p = 0.12.")

# Adjusted
idu.df.tot.nb <- cbind(idu.native.born.analysis[[3]],idu.native.born.analysis[[4]])

latex(idu.df.tot.nb, file="", rowname=NULL, cgroup=c("","Any ADE","ADE or Death"),n.cgroup=c(1,3,3),
      col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any ADE or ADE/Death in the first year after HAART start and native born status adjusting for a priori selected confounders and excluding IDU subjects (reference for native born status is 'Native Born' and for risk factors is 'Heterosexual').  Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value:  any ADE:  p < 0.001; ADE/Death:  p < 0.001.")

# L2FU - unadjusted
idu.df.tot.nb.l2fu.unadj <- idu.native.born.analysis[[11]]

latex(idu.df.tot.nb.l2fu.unadj, file="", rowname=NULL, landscape=TRUE,size="small",caption="Unadjusted survival model investigating the association of time to L2FU in the first year after HAART start and native born status, excluding IDU subjects (reference for native born status  is 'Yes').  Results are stratified by cohort.", where="!h", insert.bottom="Overall p-value for race:  p < 0.001.")

# L2FU - adjusted
idu.df.tot.nb.l2fu <- idu.native.born.analysis[[12]]

latex(idu.df.tot.nb.l2fu, file="", rowname=NULL, landscape=TRUE,size="small",caption="Adjusted survival model investigating the association of time to L2FU in the first year after HAART start and native born status adjusting for a priori selected confounders and excluding IDU subjects (reference group for native born status is 'Native born' and for risk factors is 'Heterosexual').  Results are stratified by cohort.",where="!h", insert.bottom="Total non-linear p-value: p < 0.001.")

@

<<echo=FALSE, results=tex>>=
#----------------------------------------#
#              Printing data             #
#----------------------------------------#
# Unadjusted
idu.df.tot.nb.unadj.nontb <- t(as.matrix(c(idu.native.born.nontb.analysis[[1]],idu.native.born.nontb.analysis[[2]][,-1])))
colnames(idu.df.tot.nb.unadj.nontb) <- c("Covariate",rep(c("Hazard Ratio","95\\% CI","P"),times=2))

latex(idu.df.tot.nb.unadj.nontb, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"),
      n.cgroup=c(1,3,3),
      col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and native born status, excluding IDU subjects (reference for native born status is 'Yes').  Results are stratified by cohort.",
      where="!h",insert.bottom="Overall p-value for native born status:  Any ADE:  p=0.33; ADE/Death:  p=0.33.")

# Adjusted
idu.df.tot.nb.nontb <- cbind(idu.native.born.nontb.analysis[[3]],idu.native.born.nontb.analysis[[4]])

latex(idu.df.tot.nb.nontb, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"),n.cgroup=c(1,3,3),
      col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and native born status adjusting for a priori selected confounders and excluding IDU subjects (reference for native born status is 'Yes' and for risk factors is 'Heterosexual').  Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value:  any ADE: p < 0.001; ADE/Death:  p < 0.001.")
@


%---------------------------------------------------------------------------------%
%                              Including only Heterosexuals                       %
%                      Based on comments to manuscript from ART-CC community      %
%---------------------------------------------------------------------------------%
<<echo=FALSE, results=hide>>=
hetero.baseline <- subset(sub.baseline, risk.factor == "Heterosexual")
hetero.baseline <- upData(hetero.baseline, drop='event_d7')
ddist.hetero <- datadist(hetero.baseline)
options(datadist='ddist.hetero')

#------------------------------------------------------------------------------------------------------------#
#                                          Native Born Analyses                                              #
#------------------------------------------------------------------------------------------------------------#

hetero.nb.baseline <- subset(hetero.baseline, !is.na(native.born.factor))
hetero.nb.baseline <- upData(hetero.nb.baseline, drop=c("recruit_d"))
hetero.nb.ddist <- datadist(hetero.nb.baseline)
options(datadist='hetero.nb.ddist')


hetero.nb.baseline.europe <- subset(hetero.nb.baseline, european.clinic.factor == "European site")
hetero.nb.baseline.europe <- upData(hetero.nb.baseline.europe, drop="rna_u")
hetero.nb.ddist.europe <- datadist(hetero.nb.baseline.europe)

hetero.nb.baseline.north_american <- subset(hetero.nb.baseline, european.clinic.factor == "European site")
hetero.nb.baseline.north_american <- upData(hetero.nb.baseline.north_american, drop=c("event_d5","event_d6","event6","event7","haart0_d",
                                                                                "event6_name","event7_name","event8_name",
                                                                                "rna_u"))
hetero.nb.ddist.north_american <- datadist(hetero.nb.baseline.north_american)


data <- hetero.nb.baseline
var. <- "native.born.factor"

ddist <- datadist(data)

   # For sub-group analyses in adjusted models
   data.europe <- subset(data, european.clinic.factor == "European site")
   data.north_american <- subset(data, european.clinic.factor == "North American site")

   data.europe <- upData(data.europe, drop=c("rna_u","event_d7"))
   ddist.europe <- datadist(data.europe)


   data.north_american <- upData(data.north_american, drop=c("event_d5","event_d6","event6","event7","haart0_d",
                                                             "event6_name","event7_name","event8_name"))
   ddist.north_american <- datadist(data.north_american)

   #-----------------------------#
   #      Time to any ADE        #
   #     in the first year       #
   #-----------------------------#
   Srv.ade <- Surv(data$followup_time_first_year,data$ade_first_year_of_haart)
   Srv.ade.europe <- Surv(data.europe$followup_time_first_year, data.europe$ade_first_year_of_haart)
   Srv.ade.north_american <- Surv(data.north_american$followup_time_first_year, data.north_american$ade_first_year_of_haart)

   #-----------------------------#
   #    Time to ADE or death     #
   #     in the first year       #
   #-----------------------------#
   Srv.ade.dth <- Surv(data$followup_time_first_year,data$ade_or_death_first_year_of_haart)
   Srv.ade.dth.europe <- Surv(data.europe$followup_time_first_year,data.europe$ade_or_death_first_year_of_haart)
   Srv.ade.dth.north_american <- Surv(data.north_american$followup_time_first_year,
                                      data.north_american$ade_or_death_first_year_of_haart)
   #-----------------------------#
   #       Time to death         #
   #     in the first year       #
   #-----------------------------#
   Srv.dth <- Surv(data$followup_time_first_year,data$death_first_year_of_haart)


   # Adjusted
   predictors.adj <- c(var.,"gender.factor","rcs(cd4_t0_corrected,4)","rcs(lrna_t0_corrected,3)",
                   "rcs(age,3)","pi_based.factor","nnrti_based.factor",
                   "rcs(haartyr,3)","stagec.factor","strat(cohort)")
   predictors2.adj <- c(var.,"gender.factor","rcs(cd4_t0_corrected,4)","rcs(lrna_t0_corrected,3)",
                   "rcs(age,3)","pi_based.factor","nnrti_based.factor",
                   "rcs(haartyr,3)","stagec.factor","strata(cohort)")

   # Any ADE
   fmla <- as.formula(paste("Srv.ade ~ ",paste(predictors.adj,collapse="+")))

   options(datadist='ddist')
   any.ade.adj <- cph(fmla, data=data)

   summ.50 <- summary(any.ade.adj, native.born.factor = "Yes",
                           gender.factor = "Male",
                           cd4_t0_corrected = c(200,50),
                           lrna_t0_corrected = c(5,4),
                           age = c(40,20),
                           pi_based.factor = "No",
                           nnrti_based.factor = "No",
                           haartyr = c(2001,1998),
                           stagec.factor = "No")
   ade.p <- anova(any.ade.adj)

   # ADE or Death
   fmla <- as.formula(paste("Srv.ade.dth ~ ",paste(predictors.adj, collapse="+")))
   fmla.europe <- as.formula(paste("Srv.ade.dth.europe ~ ",paste(predictors.adj, collapse="+")))
   fmla.north_american <- as.formula(paste("Srv.ade.dth.north_american ~ ",paste(predictors.adj, collapse="+")))

   options(datadist='ddist')    
   ade.or.dth.adj <- cph(fmla, data=data)

   summ.50.adedth <- summary(ade.or.dth.adj, native.born.factor = "Yes",
                           gender.factor = "Male",
                           cd4_t0_corrected = c(200,50),
                           lrna_t0_corrected = c(5,4),
                           age = c(40,20),
                           pi_based.factor = "No",
                           nnrti_based.factor = "No",
                           haartyr = c(2001,1998),
                           stagec.factor = "No")
   ade.dth.p <- anova(ade.or.dth.adj)

   # Death
   fmla <- as.formula(paste("Srv.dth ~ ",paste(predictors.adj, collapse="+")))

   options(datadist='ddist')    
   dth.adj <- cph(fmla, data=data)

   summ.50.dth <- summary(dth.adj, native.born.factor = "Yes",
                           gender.factor = "Male",
                           cd4_t0_corrected = c(200,50),
                           lrna_t0_corrected = c(5,4),
                           age = c(40,20),
                           pi_based.factor = "No",
                           nnrti_based.factor = "No",
                           haartyr = c(2001,1998),
                           stagec.factor = "No")
   dth.p <- anova(dth.adj)
@


\end{document}