#--------------------------------#
#        Loading libraries       #
#--------------------------------#
rm(list=ls())
options(width=180)
library(foreign)
library(Hmisc)
library(rms)

#--------------------------------#
#          Functions             #
#--------------------------------#
source('mylatex.summary.formula.reverse.R')


#--------------------------------#
#          Loading data          #
#--------------------------------#
mh_nade <- read.dta('deid_nades_prior_mh_01may2013.dta',convert.factors=FALSE)
names(mh_nade) <- tolower(names(mh_nade))

names(mh_nade) <- c('age_at_mh_nade_after_t0_date1','age_at_mh_nade_after_t0_date2',
                    'age_at_mh_nade_after_t0_date3','age_at_mh_nade_after_t0_date4',
                    'age_at_mh_nade_after_t0_date5','age_at_mh_nade_after_t0_date6',
                    'cfar_pid','mh_non_ade_after_t01','mh_non_ade_after_t02',
                    'mh_non_ade_after_t03','mh_non_ade_after_t04',
                    'mh_non_ade_after_t05','mh_non_ade_after_t06',
                    'mh_non_ade_category1','mh_non_ade_category2',
                    'mh_non_ade_category3','mh_non_ade_category4',
                    'mh_non_ade_category5','mh_non_ade_category6',
                    'prior_mh_non_ade','prior_mh_non_ade_no_sa',
                    'prior_mh_non_ade_only')
# Hard coding correction for 993837F -- oth_nade shows they had a NADE.  mh_nade shows it occurred in the type but
# not in the ages
mh_nade$age_at_mh_nade_after_t0_date1 <- ifelse(mh_nade$cfar_pid == '993837F',31.3977,
                                                mh_nade$age_at_mh_nade_after_t0_date1)
                                                
# Need to remove instances of NON-HODGKIN'S LYMPHOMA, LARGE CELL and NON-HODGKIN'S LYMPHOMA, UNSPECIFIED
which.nhl1 <- which(mh_nade$mh_non_ade_after_t01 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED"))
which.nhl2 <- which(mh_nade$mh_non_ade_after_t02 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED"))
which.nhl3 <- which(mh_nade$mh_non_ade_after_t03 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED"))
which.nhl4 <- which(mh_nade$mh_non_ade_after_t04 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED"))
which.nhl5 <- which(mh_nade$mh_non_ade_after_t05 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED"))
which.nhl6 <- which(mh_nade$mh_non_ade_after_t06 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED"))

# Moving those with these diagnoses
mh_nade$mh_non_ade_after_t01[which.nhl1] <- mh_nade$age_at_mh_nade_after_t0_date1[which.nhl1] <- mh_nade$mh_non_ade_category1[which.nhl1] <- NA
mh_nade$mh_non_ade_after_t01[which.nhl1] <- mh_nade$mh_non_ade_after_t02[which.nhl1]
mh_nade$mh_non_ade_after_t02[which.nhl1] <- mh_nade$mh_non_ade_after_t03[which.nhl1]
mh_nade$mh_non_ade_after_t03[which.nhl1] <- mh_nade$mh_non_ade_after_t04[which.nhl1]
mh_nade$mh_non_ade_after_t04[which.nhl1] <- mh_nade$mh_non_ade_after_t05[which.nhl1]
mh_nade$mh_non_ade_after_t05[which.nhl1] <- mh_nade$mh_non_ade_after_t06[which.nhl1]

mh_nade$age_at_mh_nade_after_t0_date1[which.nhl1] <- mh_nade$age_at_mh_nade_after_t0_date2[which.nhl1]
mh_nade$age_at_mh_nade_after_t0_date2[which.nhl1] <- mh_nade$age_at_mh_nade_after_t0_date3[which.nhl1]
mh_nade$age_at_mh_nade_after_t0_date3[which.nhl1] <- mh_nade$age_at_mh_nade_after_t0_date4[which.nhl1]
mh_nade$age_at_mh_nade_after_t0_date4[which.nhl1] <- mh_nade$age_at_mh_nade_after_t0_date5[which.nhl1]
mh_nade$age_at_mh_nade_after_t0_date5[which.nhl1] <- mh_nade$age_at_mh_nade_after_t0_date6[which.nhl1]

mh_nade$mh_non_ade_category1[which.nhl1] <- mh_nade$mh_non_ade_category2[which.nhl1]
mh_nade$mh_non_ade_category2[which.nhl1] <- mh_nade$mh_non_ade_category3[which.nhl1]
mh_nade$mh_non_ade_category3[which.nhl1] <- mh_nade$mh_non_ade_category4[which.nhl1]
mh_nade$mh_non_ade_category4[which.nhl1] <- mh_nade$mh_non_ade_category5[which.nhl1]
mh_nade$mh_non_ade_category5[which.nhl1] <- mh_nade$mh_non_ade_category6[which.nhl1]


# And for nhl2
mh_nade$age_at_mh_nade_after_t0_date2[which.nhl2] <- mh_nade$mh_non_ade_category2[which.nhl2] <- NA
mh_nade$mh_non_ade_after_t02[which.nhl2] <- mh_nade$mh_non_ade_after_t03[which.nhl2]
mh_nade$mh_non_ade_after_t03[which.nhl2] <- mh_nade$mh_non_ade_after_t04[which.nhl2]
mh_nade$mh_non_ade_after_t04[which.nhl2] <- mh_nade$mh_non_ade_after_t05[which.nhl2]
mh_nade$mh_non_ade_after_t05[which.nhl2] <- mh_nade$mh_non_ade_after_t06[which.nhl2]

mh_nade$age_at_mh_nade_after_t0_date2[which.nhl2] <- mh_nade$age_at_mh_nade_after_t0_date3[which.nhl2]
mh_nade$age_at_mh_nade_after_t0_date3[which.nhl2] <- mh_nade$age_at_mh_nade_after_t0_date4[which.nhl2]
mh_nade$age_at_mh_nade_after_t0_date4[which.nhl2] <- mh_nade$age_at_mh_nade_after_t0_date5[which.nhl2]
mh_nade$age_at_mh_nade_after_t0_date5[which.nhl2] <- mh_nade$age_at_mh_nade_after_t0_date6[which.nhl2]

mh_nade$mh_non_ade_category2[which.nhl2] <- mh_nade$mh_non_ade_category3[which.nhl2]
mh_nade$mh_non_ade_category3[which.nhl2] <- mh_nade$mh_non_ade_category4[which.nhl2]
mh_nade$mh_non_ade_category4[which.nhl2] <- mh_nade$mh_non_ade_category5[which.nhl2]
mh_nade$mh_non_ade_category5[which.nhl2] <- mh_nade$mh_non_ade_category6[which.nhl2]


oth_nade <- read.dta('deid_nades_no_mh_subst_26mar12.dta',convert.factors=FALSE)
names(oth_nade) <- tolower(names(oth_nade))

which.nhl1 <- which(oth_nade$non_ade_after_t01 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED"))
which.nhl2 <- which(oth_nade$non_ade_after_t02 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED"))
which.nhl3 <- which(oth_nade$non_ade_after_t03 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED"))
which.nhl4 <- which(oth_nade$non_ade_after_t04 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED"))
which.nhl5 <- which(oth_nade$non_ade_after_t05 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED"))
which.nhl6 <- which(oth_nade$non_ade_after_t06 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED"))


# Moving those with these diagnoses
oth_nade$non_ade_after_t01[which.nhl1] <- oth_nade$age_at_nade_after_t0_date1[which.nhl1] <- oth_nade$non_ade_category1[which.nhl1] <- NA
oth_nade$non_ade_after_t01[which.nhl1] <- oth_nade$non_ade_after_t02[which.nhl1]
oth_nade$non_ade_after_t02[which.nhl1] <- oth_nade$non_ade_after_t03[which.nhl1]
oth_nade$non_ade_after_t03[which.nhl1] <- oth_nade$non_ade_after_t04[which.nhl1]

oth_nade$age_at_nade_after_t0_date1[which.nhl1] <- oth_nade$age_at_nade_after_t0_date2[which.nhl1]
oth_nade$age_at_nade_after_t0_date2[which.nhl1] <- oth_nade$age_at_nade_after_t0_date3[which.nhl1]
oth_nade$age_at_nade_after_t0_date3[which.nhl1] <- oth_nade$age_at_nade_after_t0_date4[which.nhl1]

oth_nade$non_ade_category1[which.nhl1] <- oth_nade$non_ade_category2[which.nhl1]
oth_nade$non_ade_category2[which.nhl1] <- oth_nade$non_ade_category3[which.nhl1]
oth_nade$non_ade_category3[which.nhl1] <- oth_nade$non_ade_category4[which.nhl1]


# And for nhl2
oth_nade$age_at_nade_after_t0_date2[which.nhl2] <- oth_nade$non_ade_category2[which.nhl2] <- NA
oth_nade$non_ade_after_t02[which.nhl2] <- oth_nade$non_ade_after_t03[which.nhl2]
oth_nade$non_ade_after_t03[which.nhl2] <- oth_nade$non_ade_after_t04[which.nhl2]

oth_nade$age_at_nade_after_t0_date2[which.nhl2] <- oth_nade$age_at_nade_after_t0_date3[which.nhl2]
oth_nade$age_at_nade_after_t0_date3[which.nhl2] <- oth_nade$age_at_nade_after_t0_date4[which.nhl2]

oth_nade$non_ade_category2[which.nhl2] <- oth_nade$non_ade_category3[which.nhl2]
oth_nade$non_ade_category3[which.nhl2] <- oth_nade$non_ade_category4[which.nhl2]


oth_nade$non_ade_category2 <- ifelse(oth_nade$non_ade_category2 %in% c('','MENTAL HEALTH'),NA,oth_nade$non_ade_category2)
oth_nade$age_at_nade_after_t0_date2 <- ifelse(is.na(oth_nade$non_ade_category2),NA,oth_nade$age_at_nade_after_t0_date2)
oth_nade$non_ade_after_t02 <- ifelse(is.na(oth_nade$non_ade_category2),NA,oth_nade$non_ade_after_t02)

oth_nade$non_ade_category1 <- ifelse(oth_nade$non_ade_category1 == '',NA,oth_nade$non_ade_category1)
oth_nade$non_ade_category3 <- ifelse(oth_nade$non_ade_category3 == '',NA,oth_nade$non_ade_category3)
oth_nade$non_ade_category4 <- ifelse(oth_nade$non_ade_category4 == '',NA,oth_nade$non_ade_category4)


data <- read.dta('hormonal_contr_deid_07jun2012.dta',convert.factors=FALSE)
names(data) <- tolower(names(data))
# Need to reset to be 399 if below that
data$baseline_vl <- ifelse(!is.na(data$baseline_vl) & data$baseline_vl < 399, 399, data$baseline_vl)


# ------------------------------------------------------------------ #
#                  Creating long version of data frame               #
# ------------------------------------------------------------------ #
# NADEs excluding mental health NADEs
params <- grep('age_at_nade_after_t0_date', names(oth_nade), value=TRUE)
oth_nade_long <- na.omit(reshape(oth_nade, params, v.names='age_nade',timevar='nade',idvar='cfar_pid',
                                 direction='long',sep=',',drop=setdiff(names(data),c('cfar_pid',params))))
oth_nade_long$age_at_t0 <- data[match(oth_nade_long$cfar_pid,data$cfar_pid,nomatch=NA),'age_at_t0']
nade_outcome <- lapply(split(subset(oth_nade_long, select=c(cfar_pid,age_nade,age_at_t0)),oth_nade_long$cfar_pid),FUN=function(y){
                       if(any(!is.na(y[,2])) & any(y[which(!is.na(y[,2])),2] > y[which(!is.na(y[,2])),3])){
                          nade <- 1
                       } else {
                          nade <- 0
                       }
                       if(nade == 1){
                          age_at_first_nade <- min(y[,2],na.rm=TRUE)
                       } else {
                          age_at_first_nade <- NA
                       }
                       df <- data.frame('cfar_pid'=y[1,1],
                                         'nade_outcome'=nade,
                                        'age_at_first_nade'=age_at_first_nade)
                       return(df)})
nade_outcome.df <- do.call('rbind',nade_outcome)

# NADEs including mental health NADEs
params <- grep('age_at_mh_nade_after_t0_date', names(mh_nade), value=TRUE)
params2 <- grep('^mh_non_ade_after_t0',names(mh_nade),value=TRUE)
params3 <- grep('mh_non_ade_category',names(mh_nade),value=TRUE)

mh_nade_long <- na.omit(reshape(mh_nade, list(params, params2,params3), 
                                v.names=c('age_mh_nade','mh_nade','mh_nade_category'),timevar='time_mh_nade',idvar='cfar_pid',
                                 direction='long',sep=',',drop=setdiff(names(data),c('cfar_pid',params,params2))))

mh_nade_long$age_at_t0 <- data[match(mh_nade_long$cfar_pid,data$cfar_pid,nomatch=NA),'age_at_t0']
mh_nade_outcome <- lapply(split(subset(mh_nade_long, select=c(cfar_pid,age_mh_nade,age_at_t0)),mh_nade_long$cfar_pid),FUN=function(y){
                       if(any(!is.na(y[,2])) & any(y[which(!is.na(y[,2])),2] > y[which(!is.na(y[,2])),3])){
                          mh_nade <- 1
                       } else {
                          mh_nade <- 0
                       }
                       if(mh_nade == 1){
                          age_at_first_mh_nade <- min(y[,2],na.rm=TRUE)
                       } else {
                          age_at_first_mh_nade <- NA
                       }
                       df <- data.frame('cfar_pid'=y[1,1],
                                         'mh_nade_outcome'=mh_nade,
                                        'age_at_first_mh_nade'=age_at_first_mh_nade)
                       return(df)})
mh_nade_outcome.df <- do.call('rbind',mh_nade_outcome)


# Want an analysis of just the mental health NADEs
mh_nade_only <- subset(mh_nade_long, mh_nade_category == 'MENTAL HEALTH')
mh_nade_only$pid.age <- paste(mh_nade_only$cfar_pid,mh_nade_only$age,sep='.')

# Want an analysis of just the mental health NADEs
mh_nade_no_sa <- subset(mh_nade_long, mh_nade_category != 'SUBSTANCE ABUSE')
mh_nade_no_sa$pid.age <- paste(mh_nade_no_sa$cfar_pid,mh_nade_no_sa$age,sep='.')


#---------------------------#
params <- grep('age_at_',names(data),value=TRUE)
ages <- na.omit(reshape(data, params, v.names='age',timevar='visit',idvar='cfar_pid',direction='long',sep=',',
                       drop=setdiff(names(data),c('cfar_pid',params))))
which.rows <- which(duplicated(ages[,-2]))
ages <- ages[-which.rows,-which(names(ages) == 'visit')]

ages$age_at_first_nade <- nade_outcome.df[match(ages$cfar_pid,nade_outcome.df$cfar_pid,nomatch=NA),'age_at_first_nade']
ages$age_at_first_mh_nade <- mh_nade_outcome.df[match(ages$cfar_pid,mh_nade_outcome.df$cfar_pid,nomatch=NA),'age_at_first_mh_nade']
# ages$age_at_first_mh_nade_no_sa <- mh_nade_no_sa_outcome.df[match(ages$cfar_pid,mh_nade_no_sa_outcome.df$cfar_pid,nomatch=NA),'age_at_first_mh_nade']
ages$age_at_death <- data[match(ages$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_death']
ages$age_at_t0 <- data[match(ages$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_t0']
time_diff <- lapply(split(subset(ages, select=c(cfar_pid,age,age_at_t0)),ages$cfar_pid),
                    FUN=function(y){
                       y <- y[order(y[,'age']),]
                       diff. <- diff(y[,2])
                       df <- data.frame('cfar_pid'=y[-1,1],
                                         'age'=y[-1,2],
                                        'diff.'=diff.,
                                         'age_at_t0'=y[-1,'age_at_t0'])
                       return(df)})
time_diff.df <- do.call('rbind',time_diff)
time_diff.df$year_gap <- ifelse(time_diff.df$diff. > 1 & time_diff.df$age > time_diff.df$age_at_t0,1,0)
which.rows <- which(time_diff.df$year_gap == 1)
time_diff.df$age_after_year_gap <- NA
time_diff.df$age_after_year_gap[which.rows] <- time_diff.df$age[which.rows]
# Three are some IDs with multiple gaps of more than a year.  Want the first one so wil order by ID and age and then take the first for that ID
tmp <- subset(time_diff.df, !is.na(time_diff.df$age_after_year_gap), select=c(cfar_pid,age_after_year_gap))
tmp <- tmp[order(tmp$cfar_pid,tmp$age_after_year_gap),]
tmp <- subset(tmp, !duplicated(cfar_pid))
ages$age_after_year_gap <- tmp[match(ages$cfar_pid,tmp$cfar_pid,nomatch=NA),'age_after_year_gap']
ages <- ages[order(ages$cfar_pid,ages$age),]
sub_ages <- subset(ages, is.na(age_after_year_gap) | (age < age_after_year_gap & age >= age_at_t0))

# Finding last age of last record for subject
last_age <- lapply(split(subset(sub_ages, select=c(cfar_pid, age)),sub_ages$cfar_pid), FUN=function(y){
                   last_age <- max(y[,'age'])
                    df <- data.frame('cfar_pid'=y[1,'cfar_pid'],
                                      'last_age'=last_age)
                    return(df)})
last_age.df <- do.call('rbind',last_age)
sub_ages$last_age <- last_age.df[match(sub_ages$cfar_pid, last_age.df$cfar_pid, nomatch=NA),'last_age']


# It could be that age_at_first_... happened after year gap so need to reset those to NA if that is true
sub_ages$age_at_first_nade <- ifelse(sub_ages$last_age < sub_ages$age_at_first_nade, NA, sub_ages$age_at_first_nade)
sub_ages$age_at_first_mh_nade <- ifelse(sub_ages$last_age < sub_ages$age_at_first_mh_nade, NA, sub_ages$age_at_first_mh_nade)
# sub_ages$age_at_first_mh_nade_no_sa <- ifelse(sub_ages$last_age < sub_ages$age_at_first_mh_nade_no_sa, NA, sub_ages$age_at_first_mh_nade_no_sa)
sub_ages$age_at_death <- ifelse(sub_ages$last_age < sub_ages$age_at_death, NA, sub_ages$age_at_death)

# Determining the maxium follow-up time for any person regardless of outcome
max_age <- lapply(split(subset(sub_ages, select=c(cfar_pid,age_at_first_nade,age_at_first_mh_nade,age_at_death,last_age)),sub_ages$cfar_pid),
                        FUN=function(y){
                           max_age <- max(y[,-1],na.rm=TRUE)
                           df <- data.frame('cfar_pid'=y[1,'cfar_pid'],
                                              'max_age'=max_age)
                           return(df)})
max_age.df <- do.call('rbind',max_age)
sub_ages$max_age <- max_age.df[match(sub_ages$cfar_pid, max_age.df$cfar_pid, nomatch=NA),'max_age']
sub_ages <- sub_ages[order(sub_ages$cfar_pid,sub_ages$age),]

sub_ages$month <- with(sub_ages, (age - age_at_t0)*12+1)
sub_ages$month_rounded <- ceiling(sub_ages$month)

# Starting monthly data frame
by_month <- lapply(split(subset(sub_ages, select=c(cfar_pid, month_rounded)), sub_ages$cfar_pid),FUN=function(y){
                   followup_time <- seq(1,max(y[,'month_rounded']))
                    df <- data.frame('cfar_pid'=rep(y[1,1],times=length(followup_time)),
                                      'month'=followup_time)
                    return(df)})
time.df <- do.call('rbind',by_month)
time.df$age_at_t0 <- sub_ages[match(time.df$cfar_pid, sub_ages$cfar_pid, nomatch=NA),'age_at_t0']
time.df$age <- ifelse(time.df$month == 1, time.df$age_at_t0, NA)
time.df$age <- ifelse(time.df$month > 1, (time.df$age_at_t0 + (time.df$month-1)/12),time.df$age)

# Adding in fixed data
time.df$age_at_death <- data[match(time.df$cfar_pid,data$cfar_pid,nomatch=NA),'age_at_death']
time.df$age_at_datedelivery <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_datedelivery']
time.df$age_at_first_haart_end <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_first_haart_end']
time.df$age_at_first_haart_start <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_first_haart_start']
time.df$age_at_first_hcp_start <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_first_hcp_start']
time.df$age_at_first_hcp_end <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_first_hcp_end']
time.df$age_at_first_visit <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_first_visit']
time.df$age_at_last_visit <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_last_visit']
time.df$baseline_hemoglobin <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'baseline_hemoglobin']
time.df$baseline_cd4_percent <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'baseline_cd4_percent']
time.df$baseline_cd4_count <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'baseline_cd4_count']
time.df$baseline_vl <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'baseline_vl']
time.df$prior_hcp_t0 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'hcp_prior_t0']
time.df$non_idu_prior_t0 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'non_idu_prior_t0']
time.df$prior_haart <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'prior_haart']
time.df$prior_non_haart_art <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'prior_non_haart_art']
time.df$race <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'race']
time.df$race.factor <- factor(time.df$race, levels=c(0:10),
                              labels=c('Undefined','Caucasian','AA','Hispanic','Native American/Eskimo',
                                       'Asian','Pacific Islander','Indian','Middle Eastern','Other','Mixed race'))
time.df$race_combined <- ifelse(time.df$race %in% c(0,3,4,5,6,7,8,9,10),0,time.df$race)
time.df$race_combined.factor <- factor(time.df$race_combined, levels=c(0,1,2),
                                       labels=c('Other','Caucasian','AA'))
time.df$race_2level <- ifelse(time.df$race_combined == 1 & !is.na(time.df$race_combined),1,
                              ifelse(!is.na(time.df$race_combined) & time.df$race_combined %in% c(0,2),0,NA))
time.df$race_2level.factor <- factor(time.df$race_2level, levels=c(0,1), labels=c('Non-white','White'))
time.df$smoking_during_t0 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'smoking_during_t0']
time.df$year_of_first_haart <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'year_of_first_haart']
time.df$year_of_t0 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'year_of_t0']
time.df$sex <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'sex']
time.df$prior_mh_non_ade <- mh_nade[match(time.df$cfar_pid,mh_nade$cfar_pid,nomatch=NA),'prior_mh_non_ade']
time.df$prior_mh_non_ade_no_sa <- mh_nade[match(time.df$cfar_pid,mh_nade$cfar_pid,nomatch=NA),'prior_mh_non_ade_no_sa']
time.df$prior_mh_non_ade_only <- mh_nade[match(time.df$cfar_pid,mh_nade$cfar_pid,nomatch=NA),'prior_mh_non_ade_only']
time.df$prior_non_ade <- oth_nade[match(time.df$cfar_pid,oth_nade$cfar_pid,nomatch=NA),'prior_non_ade']
time.df$cause_of_death <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'cause_of_death']
time.df$route_of_infection <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'risk1']
time.df$route_of_infection <- ifelse(time.df$route_of_infection == '',NA,time.df$route_of_infection)
# Collapsing 'Other' which has 8 into 'Unknown'
time.df$route_of_infection <- ifelse(time.df$route_of_infection == 'Other','Unknown',time.df$route_of_infection)
time.df$route_of_infection <- factor(time.df$route_of_infection)
time.df$route_of_infection_combined <- ifelse(time.df$route_of_infection == 'IDU' & !is.na(time.df$route_of_infection),1,
                                              ifelse(time.df$route_of_infection %in% c('Hetero','Unknown'),0,NA))
time.df$route_of_infection_combined.factor <- factor(time.df$route_of_infection_combined, levels=c(0,1),
                                                     labels=c('Non-IDU','IDU'))
time.df$secondary_route_of_infection <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'risk2']
time.df$secondary_route_of_infection <- ifelse(time.df$secondary_route_of_infection == '',NA,time.df$secondary_route_of_infection)
time.df$prior_ade <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'prior_ade']
time.df$prior_hcv_t0 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'hcv_prior_t0']

time.df$pid.month <- paste(time.df$cfar_pid, time.df$month, sep='.')

# Adding in CD4 counts to monthly data
params <- grep('cd4_count',names(data),value=TRUE)
params <- params[-1] # removing baseline_cd4_count
cd4_count <- reshape(data, params, v.names='cd4_count',timevar='lab_visit',idvar='cfar_pid',direction='long',sep=',',
                      drop=setdiff(names(data),c('cfar_pid',params)))
params <- grep('age_at_lab',names(data),value=TRUE)
age_at_lab <- reshape(data, params, v.names='age_at_lab',timevar='lab_visit',idvar='cfar_pid',direction='long',sep=',',
                       drop=setdiff(names(data), c('cfar_pid',params)))
params <- grep('hiv1_rna',names(data),value=TRUE)
hiv1_rna <- reshape(data, params, v.names='hiv1_rna',timevar='lab_visit',idvar='cfar_pid',direction='long',sep=',',
                     drop=setdiff(names(data),c('cfar_pid',params)))
labs <- merge(cd4_count,hiv1_rna,all=TRUE)
labs <- merge(labs, age_at_lab, all=TRUE)
labs$baseline_cd4_count <- data[match(labs$cfar_pid, data$cfar_pid, nomatch=NA),'baseline_cd4_count']
labs$baseline_vl <- data[match(labs$cfar_pid, data$cfar_pid, nomatch=NA),'baseline_vl']
labs <- labs[,-2]
labs$age_at_t0 <- data[match(labs$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_t0']
labs$month <- with(labs, (age_at_lab - age_at_t0)*12+1)
labs$month_rounded <- ceiling(labs$month)
labs$pid.month <- paste(labs$cfar_pid, labs$month_rounded, sep='.')

# To add CD4 count to monthly data frame:
# For those PIDs who have duplicated records in a month, take the non-missing CD4 count or the first CD4 count in the month if
# all are non-missing
labs <- labs[order(labs$cfar_pid, labs$month_rounded, labs$cd4_count, na.last=TRUE),]
which.rows <- which(duplicated(labs$pid.month) & is.na(labs$cd4_count))
sub_labs <- labs[-which.rows,]
sub_labs <- sub_labs[order(sub_labs$cfar_pid, sub_labs$age_at_lab),]
sub_labs$cd4_count <- ifelse(sub_labs$month_rounded == 1 & (sub_labs$cd4_count != sub_labs$baseline_cd4_count |
                                                            (is.na(sub_labs$cd4_count) & !is.na(sub_labs$baseline_cd4_count))),
                             sub_labs$baseline_cd4_count,sub_labs$cd4_count)
time.df$cd4_count <- sub_labs[match(time.df$pid.month, sub_labs$pid.month,nomatch=NA),'cd4_count']
time.df$cd4_count <- ifelse(time.df$month == 1 & is.na(time.df$cd4_count) & !is.na(time.df$baseline_cd4_count),
                            time.df$baseline_cd4_count,time.df$cd4_count)
# Repeat for HIV1-RNA
labs <- labs[order(labs$cfar_pid, labs$month_rounded, labs$hiv1_rna, na.last=TRUE),]
which.rows <- which(duplicated(labs$pid.month) & is.na(labs$hiv1_rna))
sub_labs <- labs[-which.rows,]
sub_labs <- sub_labs[order(sub_labs$cfar_pid, sub_labs$age_at_lab),]
time.df$hiv1_rna <- sub_labs[match(time.df$pid.month, sub_labs$pid.month,nomatch=NA),'hiv1_rna']
sub_labs$hiv1_rna <- ifelse(sub_labs$month_rounded == 1 & (sub_labs$hiv1_rna != sub_labs$baseline_vl |
                                                           (is.na(sub_labs$hiv1_rna) & !is.na(sub_labs$baseline_vl))),
                            sub_labs$baseline_vl,sub_labs$hiv1_rna)

# Lower limits of detection for viral loads seem to vary so will set all to 399
time.df$hiv1_rna <- sub_labs[match(time.df$pid.month, sub_labs$pid.month, nomatch=NA),'hiv1_rna']
time.df$hiv1_rna <- ifelse(time.df$hiv1_rna < 399 & !is.na(time.df$hiv1_rna),399,time.df$hiv1_rna)
time.df$hiv1_rna <- ifelse(time.df$month == 1 & is.na(time.df$hiv1_rna) & !is.na(time.df$baseline_vl),
                            time.df$baseline_vl,time.df$hiv1_rna)


# Adding in regimen information
# First need to define those that straddle age_at_t0 and modify start date for those;
# Will also omit those whose start and end date occurred before age_at_t0
params <- grep('age_at_reg_start_',names(data),value=TRUE)
reg_start <- na.omit(reshape(data, params, timevar='regimen',idvar='cfar_pid',direction='long',sep='',
                             drop=setdiff(names(data),c('cfar_pid',params))))
reg_start$age <- reg_start$age_at_reg_start_date


params <- grep('age_at_reg_end_',names(data),value=TRUE)
reg_end <- na.omit(reshape(data, params, timevar='regimen',idvar='cfar_pid',direction='long',sep='',
                           drop=setdiff(names(data),c('cfar_pid',params))))
reg_end$age <- reg_end$age_at_reg_end_date
# reg_end$regimen <- reg_end$reg_end

reg_total <- merge(subset(reg_start, select=c(cfar_pid,regimen,age_at_reg_start_date)),
                   subset(reg_end, select=c(cfar_pid,regimen,age_at_reg_end_date)),by=c('cfar_pid','regimen'),all=TRUE)
reg_total$age_at_t0 <- data[match(reg_total$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_t0']
reg_total$straddle <- ifelse(reg_total$age_at_reg_start_date < reg_total$age_at_t0 & 
                             reg_total$age_at_reg_end_date > reg_total$age_at_t0,1,0)
reg_total$omit <- ifelse(reg_total$age_at_reg_start_date < reg_total$age_at_t0 & 
                         reg_total$age_at_reg_end_date < reg_total$age_at_t0,1,0)
reg_total <- subset(reg_total, omit == 0)
reg_total$age_at_reg_start_date_modified <- ifelse(reg_total$straddle == 1,reg_total$age_at_t0,
                                                   reg_total$age_at_reg_start_date)
reg_total$pid.reg <- paste(reg_total$cfar_pid,reg_total$regimen,sep='.')

#--------------------------#
new.dat <- reshape(subset(reg_total, select=c(cfar_pid, age_at_reg_start_date_modified,age_at_reg_end_date)), timevar='regimen',idvar='pid.reg',
             varying=list(c('age_at_reg_start_date_modified','age_at_reg_end_date')),v.names='age',direction='long')
new.dat$count <- rep(c(1L,-1L),each=(nrow(new.dat)/2))
new.dat$reg_ind <- 0L

times <- time.df[,c('cfar_pid','age')]
vars <- merge(new.dat, times, by=c('cfar_pid','age'),all=TRUE, sort=TRUE)
vars$count[is.na(vars$count)] <- 0L

split(vars$reg_ind, vars$cfar_pid) <- lapply(split(vars$count, vars$cfar_pid),
                                             function(x) cumsum(x) + as.integer(x == -1L))
new.time.df <- merge(time.df, vars[,c('cfar_pid','age','reg_ind')],by=c('cfar_pid','age'),all.x=TRUE)
time.df <- new.time.df
rm(new.time.df)

#--------------------------#

# Also need reg_date_type
# For those that straddle age_at_t0, need to get age_at_reg_start_date_modified for below
reg_type <- merge(subset(reg_start, select=c(cfar_pid,regimen,age,age_at_reg_start_date)),
                  subset(reg_end, select=c(cfar_pid, regimen,age,age_at_reg_end_date)),all=TRUE)
reg_type$pid.reg <- paste(reg_type$cfar_pid, reg_type$regimen,sep='.')
reg_type$age_at_reg_start_date_modified <- reg_total[match(reg_type$pid.reg, reg_total$pid.reg, nomatch=NA),'age_at_reg_start_date_modified']

reg_type$reg_date_type <- ifelse(!is.na(reg_type$age_at_reg_start_date_modified) & is.na(reg_type$age_at_reg_end_date),'START',
                                 ifelse(is.na(reg_type$age_at_reg_start_date) & !is.na(reg_type$age_at_reg_end_date),'END',NA))
reg_type$age_at_t0 <- data[match(reg_type$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_t0']
reg_type$age_after_year_gap <- ages[match(reg_type$cfar_pid,ages$cfar_pid,nomatch=NA),'age_after_year_gap']

reg_type <- subset(reg_type, age >= age_at_t0 & (is.na(age_after_year_gap) | age < age_after_year_gap))
reg_type <- reg_type[order(reg_type$cfar_pid, reg_type$age),]
reg_type$month <- with(reg_type, (age - age_at_t0)*12+1)
reg_type$month_rounded <- ceiling(reg_type$month)
reg_type$pid.month <- paste(reg_type$cfar_pid,reg_type$month_rounded, sep='.')

reg_type <- reg_type[order(reg_type$cfar_pid,reg_type$age),]

# Calculating reg_date_type_modified_full
junk <- lapply(split(subset(reg_type, select=c(reg_date_type,pid.month)),reg_type$pid.month), FUN=function(y){
               if(all(is.na(y[,1]))){
                  x <- NA
                  x.full <- NA
               } else {
                  x <- paste(unique(y[!is.na(y[,1]),1]), collapse=',')
                  x.full <- paste(y[!is.na(y[,1]),1], collapse=',')
               }
               if(is.na(x)){
                  j <- NA
               } else if(paste(unique(x,collapse=',')) == 'START,END'){
                  j <- 'BOTH'
               } else if(paste(unique(x,collapse=',')) == 'END,START'){
                  j <- 'END,START'
               } else {
                  j <- unique(y[!is.na(y[,1]),1])
               }
               df <- data.frame('pid.month'=y[1,2],
                                'reg_date_type_modified'=j,
                                'reg_date_type_modified_full'=x.full)
               return(df) })
junk.df <- do.call('rbind',junk)

reg_type$reg_date_type_modified <- junk.df[match(reg_type$pid.month,junk.df$pid.month,nomatch=NA),'reg_date_type_modified']
reg_type$reg_date_type_modified_full <- junk.df[match(reg_type$pid.month,junk.df$pid.month,nomatch=NA),'reg_date_type_modified_full']

time.df$reg_date_type_modified <- reg_type[match(time.df$pid.month,reg_type$pid.month,nomatch=NA),'reg_date_type_modified']
time.df$reg_date_type_modified_full <- reg_type[match(time.df$pid.month,reg_type$pid.month,nomatch=NA),'reg_date_type_modified_full']


# Need to change those who have 0 at reg_date_type_modified = 'END' from 0 to 1
junk <- lapply(split(subset(time.df, select=c(cfar_pid,reg_date_type_modified,reg_ind)),time.df$cfar_pid),FUN=function(y){
                which.end <- which(y[,'reg_date_type_modified'] == 'END' & !is.na(y[,'reg_date_type_modified']))
                if(length(which.end) == 1 && which.end == 1){
                     y[1,'reg_ind'] <- 1
                } else if(1 %in% which.end & length(which.end) > 1){
                     y[which.end,'reg_ind'] <- c(1,y[(which.end[-1]-1),'reg_ind'])
                } else if(length(which.end) > 0){
                   y[which.end,'reg_ind'] <- y[(which.end - 1),'reg_ind']
                }
                df <- data.frame('cfar_pid'=y[,'cfar_pid'],
                                 'reg_ind_modified'=y[,'reg_ind'])
                return(df)})
junk.df <- do.call('rbind',junk)
time.df <- cbind(time.df,junk.df$reg_ind_modified)
names(time.df)[dim(time.df)[2]] <- 'reg_count_modified'
time.df$reg_indicator <- ifelse(time.df$reg_count_modified > 0,1,0)
# A few with 'BOTH' or 'END,START' had 0 for reg_indicator and reg_count_modified so need to change that
time.df$reg_count_modified <- ifelse(time.df$reg_date_type_modified %in% c('BOTH','END,START') & 
                                     time.df$reg_count_modified == 0,1,time.df$reg_count_modified)
time.df$reg_indicator <- ifelse(time.df$reg_date_type_modified %in% c('BOTH','END,START') & 
                                time.df$reg_indicator == 0,1,time.df$reg_indicator)
straddle <- tapply(reg_total$straddle, INDEX=reg_total$cfar_pid, FUN=max)
straddle.df <- data.frame('cfar_pid'=names(straddle),
                          'straddle'=straddle)
time.df$straddle <- straddle.df[match(time.df$cfar_pid, straddle.df$cfar_pid, nomatch=NA),'straddle']
which.row <- which(!duplicated(time.df$cfar_pid) & time.df$straddle == 1)
time.df$reg_date_type_modified[which.row] <- 'START'


# #------------------------------#
params <- grep('haart[0123456789]',names(data),value=TRUE)

haart <- na.omit(reshape(data, params, v.names='haart_ind',timevar='reg',idvar='cfar_pid',direction='long',sep=',',
                       drop=setdiff(names(data),c('cfar_pid',params))))
haart <- haart[order(haart$cfar_pid,haart$reg),]
haart$pid.reg <- paste(haart$cfar_pid,haart$reg, sep='.')
# reg_total is subsetted on those that happened after age_at_t0 and before age_after_year_gap
reg_total$haart_indicator <- haart[match(reg_total$pid.reg, haart$pid.reg,nomatch=NA),'haart_ind']
sub_reg_total <- subset(reg_total, haart_indicator == 1)


haart.new.dat <- reshape(subset(sub_reg_total, select=c(cfar_pid, age_at_reg_start_date_modified,age_at_reg_end_date)),
                         timevar='regimen',idvar='pid.reg',varying=list(c('age_at_reg_start_date_modified','age_at_reg_end_date')),
                         v.names='age',direction='long')
haart.new.dat$count <- rep(c(1L,-1L),each=(nrow(haart.new.dat)/2))
haart.new.dat$haart_ind <- 0L

times <- time.df[,c('cfar_pid','age')]
haart.vars <- merge(haart.new.dat, times, by=c('cfar_pid','age'),all=TRUE, sort=TRUE)
haart.vars$count[is.na(haart.vars$count)] <- 0L

split(haart.vars$haart_ind, haart.vars$cfar_pid) <- lapply(split(haart.vars$count, haart.vars$cfar_pid),
                                             function(x) cumsum(x) + as.integer(x == -1L))
new.time.df <- merge(time.df, haart.vars[,c('cfar_pid','age','haart_ind')],by=c('cfar_pid','age'),all.x=TRUE)
time.df <- new.time.df
rm(new.time.df)
# To correct where at 'END' or 'BOTH' haart_ind is 0 and not 1
time.df$haart_ind <- ifelse(time.df$reg_date_type_modified %in% c('END','BOTH') & 
                            time.df$reg_count_modified == 1 & time.df$haart_ind == 0,1,
                            time.df$haart_ind)
@

<<echo=FALSE, results=hide>>=
ids.with.missing.baseline.cd4 <- time.df$cfar_pid[which(is.na(time.df$cd4_count) & !duplicated(time.df$cfar_pid))]
ids.with.missing.baseline.vl <- time.df$cfar_pid[which(is.na(time.df$hiv1_rna) & !duplicated(time.df$cfar_pid))]

# Need to hard code baseline CD4 for the 14 PIDs who have missing baseline CD4 counts
# PIDs with missing baseline CD4 counts:
# 993179A:  CD4 count 4.4 months after baseline (was on c-ART)
# 993201K:  delete; closest CD4 is 63.3 months after baseline
# 993344K:  CD4 count 4.4 months before baseline (not on c-ART when measured)
# 993634E:  CD4 count 9.2 months after baseline (on c-ART when measured)
# 993875G:  delete; closest CD4 is 67.4 months after baseline
# 993918D:  CD4 count 4.6 months before baseline (not on c-ART when measured; only 1 record in data)
# 994413A:  delete
# 994768B:  delete; closest CD4 is 30.0 months after baseline
# 994900I:  delete; closest CD4 is 28.2 months after baseline
# 994977K:  CD4 count 4.7 months after baseline (on and off c-ART repeatedly)
# 995212F:  delete; CD4 count 20.4 months after baseline
# 995832C:  avg CD4 count 15.2 months before baseline and CD4 count 4.7 months after baseline (not on c-ART before; on after)
# 996128E:  avg CD4 count 5.6 months before baseline and CD4 count 7.1 months after baseline (on c-ART before and after baseline)
# 996405H:  delete

ids.2.delete <- c('993201K','993875G','994413A','994768B','994900I','995212F','996405H')
ids.2.alter <- c('993179A','993344K','993634E','993918D','994977K','995832C','996128E')
time.df <- subset(time.df, cfar_pid %nin% ids.2.delete)
time.df$cfar_pid <- factor(time.df$cfar_pid)
time.df$baseline_cd4_count_b <- ifelse(time.df$cfar_pid == '993179A',280,
                                     ifelse(time.df$cfar_pid == '993344K',20,
                                            ifelse(time.df$cfar_pid == '993634E',182,
                                                   ifelse(time.df$cfar_pid == '993918D',20,
                                                          ifelse(time.df$cfar_pid == '994977K',351,
                                                                 ifelse(time.df$cfar_pid == '995832C',((78+148)/2),
                                                                        ifelse(time.df$cfar_pid == '996128E',
                                                                               ((73+50)/2),time.df$baseline_cd4_count)))))))

junk <- data.frame('cfar_pid'=ids.2.alter,
                   'new_cd4'=c(280,20,182,20,351,(78+148)/2,(73+50)/2))
which.row <- which(time.df$cfar_pid %in% ids.2.alter & time.df$month == 1)
time.df$cd4_count[which.row] <- junk$new_cd4

ids.2.alter.vl <- ids.with.missing.baseline.vl[which(ids.with.missing.baseline.vl %in% ids.2.alter)]
time.df$baseline_vl_b <- ifelse(time.df$cfar_pid == '993344K',400000,
                                ifelse(time.df$cfar_pid == '993918D',408582,
                                       ifelse(time.df$cfar_pid == '994977K',1160,
                                              ifelse(time.df$cfar_pid == '995832C',((100001+49)/2),
                                                     time.df$baseline_vl))))
junk <- data.frame('cfar_pid'=ids.2.alter.vl,
                   'new_vl'=c(400000, 408582, 1160, (100001+399)/2))
which.row <- which(time.df$cfar_pid %in% ids.2.alter.vl & time.df$month == 1)
time.df$hiv1_rna[which.row] <- junk$new_vl
@


<<echo=FALSE, results=hide>>=
# Dragging down CD4 counts and viral loads and age at which those measurements made
time.df$cd4_count_b <- unsplit(lapply(split(time.df$cd4_count, time.df$cfar_pid),
                                        function(x){L <- !is.na(x)
                                                    x[c(NA,which(L))[cumsum(L)+1]]}),
                                 time.df$cfar_pid)
time.df$age_at_cd4_count <- unsplit(lapply(split(subset(time.df, select=c(cfar_pid,cd4_count,age)),time.df$cfar_pid),
                                             function(x){L <- !is.na(x[,'cd4_count'])
                                                         x[c(NA,which(L))[cumsum(L)+1],'age']}),
                                      time.df$cfar_pid)

time.df$hiv1_rna_b <- unsplit(lapply(split(time.df$hiv1_rna, time.df$cfar_pid),
                                        function(x){L <- !is.na(x)
                                                    x[c(NA,which(L))[cumsum(L)+1]]}),
                              time.df$cfar_pid)
time.df$log10_hiv1_rna_b <- log10(time.df$hiv1_rna_b)
time.df$undetectable_vl <- ifelse(time.df$hiv1_rna_b == 399 & !is.na(time.df$hiv1_rna_b),1,
                                  ifelse(!is.na(time.df$hiv1_rna_b),0,NA))


time.df$age_at_hiv1_rna <- unsplit(lapply(split(subset(time.df, select=c(cfar_pid,hiv1_rna,age)),time.df$cfar_pid),
                                             function(x){L <- !is.na(x[,'hiv1_rna'])
                                                         x[c(NA,which(L))[cumsum(L)+1],'age']}),
                                      time.df$cfar_pid)


#------------------------------------------#
# Now need to add in hormonal contraceptive information

params <- grep('age_at_hcp_start',names(data),value=TRUE)
hcp_start <- na.omit(reshape(data, params, timevar='regimen',idvar='cfar_pid',direction='long',sep='',
                             drop=setdiff(names(data),c('cfar_pid',params))))
hcp_start$age <- hcp_start$age_at_hcp_start_date

params <- grep('hcp_title',names(data), value=TRUE)
params <- params[-which(params == 'first_hcp_title')]
data$hcp_title1 <- ifelse(data$hcp_title1 == '',NA,data$hcp_title1)
data$hcp_title2 <- ifelse(data$hcp_title2 == '',NA,data$hcp_title2)
data$hcp_title3 <- ifelse(data$hcp_title3 == '',NA,data$hcp_title3)
data$hcp_title4 <- ifelse(data$hcp_title4 == '',NA,data$hcp_title4)
data$hcp_title5 <- ifelse(data$hcp_title5 == '',NA,data$hcp_title5)

hcp_type <- na.omit(reshape(data, params, timevar='regimen',idvar='cfar_pid',direction='long',sep='',
                            drop=setdiff(names(data),c('cfar_pid',params))))
hcp_start <- merge(hcp_start, hcp_type, all=TRUE)

params <- grep('age_at_hcp_end',names(data),value=TRUE)
hcp_end <- na.omit(reshape(data, params, timevar='regimen',idvar='cfar_pid',direction='long',sep='',
                             drop=setdiff(names(data),c('cfar_pid',params))))
hcp_end$age <- hcp_end$age_at_hcp_end_date

hcp_total <- merge(subset(hcp_start, select=c(cfar_pid,regimen,age_at_hcp_start_date,hcp_title)),
                   subset(hcp_end, select=c(cfar_pid,regimen,age_at_hcp_end_date)),
                   by=c('cfar_pid','regimen'),all=TRUE)
hcp_total$age_at_t0 <- data[match(hcp_total$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_t0']
hcp_total$age_after_year_gap <- ages[match(hcp_total$cfar_pid,ages$cfar_pid,nomatch=NA),'age_after_year_gap']
hcp_total$straddle <- ifelse(hcp_total$age_at_hcp_start_date < hcp_total$age_at_t0 &
                             hcp_total$age_at_hcp_end_date > hcp_total$age_at_t0,1,0)
hcp_total$end_straddle <- ifelse(hcp_total$age_at_hcp_start_date < hcp_total$age_after_year_gap & 
                                 hcp_total$age_at_hcp_end_date >= hcp_total$age_after_year_gap,1,0)
hcp_total$omit <- ifelse(hcp_total$age_at_hcp_start_date < hcp_total$age_at_t0 & 
                         hcp_total$age_at_hcp_end_date <= hcp_total$age_at_t0,1,0)
hcp_total <- subset(hcp_total, omit == 0)
hcp_total$age_at_hcp_start_date_modified <- ifelse(hcp_total$straddle == 1, hcp_total$age_at_t0,
                                                   hcp_total$age_at_hcp_start_date)
# Need to reorder regimens in hcp_total so that all start with 1 -- some don't because they had regimens
# prior to baseline
hcp_total$regimen <- NA
split(hcp_total$regimen, hcp_total$cfar_pid) <- lapply(split(subset(hcp_total, select=c(regimen,cfar_pid)),hcp_total$cfar_pid),
                                                       FUN=function(y){
                                                          1:nrow(y)})
hcp_total$pid.hcp <- paste(hcp_total$cfar_pid, hcp_total$regimen,sep='.')
hcp_total$pid.age <- paste(hcp_total$cfar_pid, hcp_total$age_at_hcp_start_date_modified,sep='.')
hcp_total$hcp_title_category <- ifelse(hcp_total$hcp_title %in% c('Depo-Provera','Provera','Medroxyprogest Ace (Contracep)'),
                                       'Depo-Provera','Other')


# Adding in depo
hcp_total_depo <- subset(hcp_total, hcp_title_category == 'Depo-Provera')
new.dat.depo <- na.omit(reshape(subset(hcp_total_depo, select=c(cfar_pid,age_at_hcp_start_date_modified,age_at_hcp_end_date,
                                                                hcp_title,hcp_title_category)),v.names='age',
                                timevar='regimen',idvar='pid.hcp',direction='long',sep=',',varying=list(c('age_at_hcp_start_date_modified',
                                  'age_at_hcp_end_date'))))
new.dat.depo$pid.age <- paste(new.dat.depo$cfar_pid,new.dat.depo$age,sep='.')
new.dat.depo$pid.regimen <- paste(new.dat.depo$cfar_pid,new.dat.depo$regimen,sep='.')
hcp_total_depo$pid.regimen <- paste(hcp_total_depo$cfar_pid,hcp_total_depo$regimen,sep='.')
new.dat.depo <- upData(new.dat.depo, rename=c('hcp_title'='hcp_title_depo',
                                              'hcp_title_category'='hcp_depo_category'))

new.dat.depo$count <- rep(c(1L,-1L),each=(nrow(new.dat.depo)/2))
new.dat.depo$hcp_ind <- 0L

times <- time.df[,c('cfar_pid','age','month')]
vars <- merge(new.dat.depo, times,by=c('cfar_pid','age'),all=TRUE,sort=TRUE)
vars$count[is.na(vars$count)] <- 0L

split(vars$hcp_ind, vars$cfar_pid) <- lapply(split(vars$count, vars$cfar_pid),
                                             function(x) cumsum(x) + as.integer(x == -1L))

vars$pid.hcp2 <- unsplit(lapply(split(vars$pid.hcp, vars$cfar_pid),
                                function(x){L <- !is.na(x)
                                            x[c(NA,which(L))[cumsum(L)+1]]}),
                         vars$cfar_pid)
vars$pid.hcp2 <- ifelse(vars$hcp_ind == 0, NA,vars$pid.hcp2)

vars$hcp_title_depo <- ifelse(vars$hcp_ind %in% c(1,2),'Depo-Provera',NA)
vars$hcp_depo_category <- ifelse(vars$hcp_ind %in% c(1,2),'Depo-Provera',NA)
vars$age_at_t0 <- time.df[match(vars$cfar_pid,time.df$cfar_pid,nomatch=NA),'age_at_t0']
vars$month2 <- round(12*(vars$age - vars$age_at_t0) + 1)

vars$pid.month <- paste(vars$cfar_pid, vars$month2,sep='.')

split(vars$hcp_ind, vars$pid.month) <- lapply(split(subset(vars,select=c(month2,hcp_ind)),vars$pid.month),FUN=function(y){
                                               max(y[,'hcp_ind'],na.rm=TRUE)})
vars$hcp_title_depo <- ifelse(vars$hcp_ind %in% c(1,2), 'Depo-Provera',NA)
vars$hcp_depo_category <- ifelse(vars$hcp_ind %in% c(1,2), 'Depo-Provera',NA)

new.time.df <- merge(time.df, vars[,c('cfar_pid','age','hcp_ind','pid.hcp2','hcp_title_depo','hcp_depo_category')],
                     by=c('cfar_pid','age'),all.x=TRUE)
new.time.df <- upData(new.time.df, rename=c('hcp_ind'='hcp_depo_ind'))
time.df <- new.time.df
rm(new.time.df)


# Oral
hcp_total_other <- subset(hcp_total, hcp_title_category == 'Other')
new.dat.other <- na.omit(reshape(subset(hcp_total_other, select=c(cfar_pid,age_at_hcp_start_date_modified,age_at_hcp_end_date,
                                                                hcp_title,hcp_title_category)),v.names='age',
                                timevar='regimen',idvar='pid.hcp',direction='long',sep=',',varying=list(c('age_at_hcp_start_date_modified',
                                  'age_at_hcp_end_date'))))
new.dat.other$pid.age <- paste(new.dat.other$cfar_pid,new.dat.other$age,sep='.')
new.dat.other$pid.regimen <- paste(new.dat.other$cfar_pid,new.dat.other$regimen,sep='.')
hcp_total_other$pid.regimen <- paste(hcp_total_other$cfar_pid,hcp_total_other$regimen,sep='.')
new.dat.other <- upData(new.dat.other, rename=c('hcp_title'='hcp_title_other',
                                              'hcp_title_category'='hcp_other_category'))

new.dat.other$count <- rep(c(1L,-1L),each=(nrow(new.dat.other)/2))
new.dat.other$hcp_ind <- 0L

times <- time.df[,c('cfar_pid','age','month')]
vars <- merge(new.dat.other, times,by=c('cfar_pid','age'),all=TRUE,sort=TRUE)
vars$count[is.na(vars$count)] <- 0L

split(vars$hcp_ind, vars$cfar_pid) <- lapply(split(vars$count, vars$cfar_pid),
                                             function(x) cumsum(x) + as.integer(x == -1L))

vars$pid.hcp2 <- unsplit(lapply(split(vars$pid.hcp, vars$cfar_pid),
                                function(x){L <- !is.na(x)
                                            x[c(NA,which(L))[cumsum(L)+1]]}),
                         vars$cfar_pid)
vars$pid.hcp2 <- ifelse(vars$hcp_ind == 0, NA,vars$pid.hcp2)


vars$hcp_other_category <- ifelse(vars$hcp_ind %in% c(1,2),'Other',NA)
vars$age_at_t0 <- time.df[match(vars$cfar_pid,time.df$cfar_pid,nomatch=NA),'age_at_t0']
vars$month2 <- round(12*(vars$age - vars$age_at_t0) + 1)

vars$pid.month <- paste(vars$cfar_pid, vars$month2,sep='.')

split(vars$hcp_ind, vars$pid.month) <- lapply(split(subset(vars,select=c(month2,hcp_ind)),vars$pid.month),FUN=function(y){
                                               max(y[,'hcp_ind'],na.rm=TRUE)})
vars$hcp_other_category <- ifelse(vars$hcp_ind %in% c(1,2), 'Other',NA)

vars$hcp_title_other <- unsplit(lapply(split(subset(vars,select=c(hcp_ind,hcp_title_other)), vars$cfar_pid),
                                       function(x){L <- !is.na(x[,'hcp_title_other']) & x[,'hcp_ind'] %in% c(1,2)
                                                   x[c(NA,which(L))[cumsum(L)+1],'hcp_title_other']}),
                                vars$cfar_pid)
# The above code drags it down to all remaining records.  Don't want it on records with hcp_ind == 0 so am fixing it in 
# next line of code
vars$hcp_title_other <- ifelse(vars$hcp_ind == 0,NA,vars$hcp_title_other)
vars$hcp_ind <- ifelse(vars$hcp_ind %in% c(1,2),1,0)


vars$hcp_title_other_test <- NA
split(vars$hcp_title_other_test, vars$pid.month) <- lapply(split(subset(vars,select=c(pid.month,hcp_title_other)), 
                                                                 vars$pid.month),FUN=function(y){
                                                       if(all(is.na(y[,'hcp_title_other']))){
                                                          rep(NA,times=length(y[,'pid.month']))
                                                       } else if(length(y[which(!is.na(y[,'hcp_title_other'])),'hcp_title_other']) > 1){
                                                          # taking second unique if more than one in a month since that is 
                                                          # new regimen started, not stopped
                                                          tmp <- unique(y[which(!is.na(y[,'hcp_title_other'])),'hcp_title_other'])
                                                          if(length(tmp) > 1){
                                                             rep(unique(y[which(!is.na(y[,'hcp_title_other'])),'hcp_title_other'])[length(tmp)],
                                                                 times=length(y[,'pid.month']))
                                                          } else {
                                                             rep(unique(y[which(!is.na(y[,'hcp_title_other'])),'hcp_title_other']),
                                                                 times=length(y[,'pid.month']))
                                                          }
                                                       } else {
                                                          rep(unique(y[which(!is.na(y[,'hcp_title_other'])),'hcp_title_other']),
                                                              times=length(y[,'pid.month']))
                                                       } })
vars$hcp_title_other <- vars$hcp_title_other_test
vars$hcp_other_category <- ifelse(vars$hcp_ind %in% c(1,2),'Other',NA)

new.time.df <- merge(time.df, vars[,c('cfar_pid','age','hcp_ind','pid.hcp2','hcp_title_other','hcp_other_category')],
                     by=c('cfar_pid','age'),all.x=TRUE)
new.time.df <- upData(new.time.df, rename=c('hcp_ind'='hcp_other_ind'))
time.df <- new.time.df
rm(new.time.df)


# Need to keep this to have a general hcp_ind for overall cumulative hcp use 
new.dat <- reshape(subset(hcp_total, select=c(cfar_pid,age_at_hcp_start_date_modified,age_at_hcp_end_date)),
                   timevar='regimen',idvar='pid.hcp',
                   varying=list(c('age_at_hcp_start_date_modified','age_at_hcp_end_date')),
                   v.names='age',direction='long')
new.dat$pid.age <- paste(new.dat$cfar_pid,new.dat$age,sep='.')

new.dat$pid.regimen <- paste(new.dat$cfar_pid,new.dat$regimen,sep='.')
hcp_total$pid.regimen <- paste(hcp_total$cfar_pid,hcp_total$regimen,sep='.')

new.dat$count <- rep(c(1L,-1L),each=(nrow(new.dat)/2))
new.dat$hcp_ind <- 0L

times <- time.df[,c('cfar_pid','age')]
vars <- merge(new.dat, times,by=c('cfar_pid','age'),all=TRUE,sort=TRUE)
vars$count[is.na(vars$count)] <- 0L

vars$age_at_t0 <- time.df[match(vars$cfar_pid,time.df$cfar_pid,nomatch=NA),'age_at_t0']
vars$month2 <- round(12*(vars$age - vars$age_at_t0) + 1)

vars$pid.month <- paste(vars$cfar_pid, vars$month2,sep='.')

split(vars$hcp_ind, vars$cfar_pid) <- lapply(split(vars$count, vars$cfar_pid),
                                             function(x) cumsum(x) + as.integer(x == -1L))

split(vars$hcp_ind, vars$pid.month) <- lapply(split(subset(vars,select=c(month2,hcp_ind)),vars$pid.month),FUN=function(y){
                                              max(y[,'hcp_ind'],na.rm=TRUE) })
vars$hcp_ind <- ifelse(vars$hcp_ind %in% c(1,2),1,0)

vars$pid.hcp2 <- unsplit(lapply(split(vars$pid.hcp, vars$cfar_pid),
                                function(x){L <- !is.na(x)
                                            x[c(NA,which(L))[cumsum(L)+1]]}),
                         vars$cfar_pid)
vars$pid.hcp2 <- ifelse(vars$hcp_ind == 0, NA,vars$pid.hcp2)


new.time.df <- merge(time.df, vars[,c('cfar_pid','age','hcp_ind','pid.hcp2')],
                     by=c('cfar_pid','age'),all.x=TRUE)
time.df <- new.time.df
rm(new.time.df)


# Also need hcp_date_type
hcp_type <- merge(subset(hcp_start, select=c(cfar_pid, regimen, age, age_at_hcp_start_date)),
                  subset(hcp_end, select=c(cfar_pid, regimen, age, age_at_hcp_end_date)),all=TRUE)
hcp_type$pid.hcp <- paste(hcp_type$cfar_pid,hcp_type$regimen,sep='.')
hcp_type$age_at_hcp_start_date_modified <- hcp_total[match(hcp_type$pid.hcp, hcp_total$pid.hcp,nomatch=NA),'age_at_hcp_start_date_modified']

hcp_type$age <- ifelse(!is.na(hcp_type$age_at_hcp_start_date) & 
                       hcp_type$age_at_hcp_start_date_modified != hcp_type$age_at_hcp_start_date,hcp_type$age_at_hcp_start_date_modified,
                       hcp_type$age)
hcp_type$hcp_date_type <- ifelse(!is.na(hcp_type$age_at_hcp_start_date_modified) & 
                                 is.na(hcp_type$age_at_hcp_end_date),'START',
                                 ifelse(is.na(hcp_type$age_at_hcp_start_date) & 
                                 !is.na(hcp_type$age_at_hcp_end_date),'END',NA))
hcp_type$age_at_t0 <- data[match(hcp_type$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_t0']
hcp_type$age_after_year_gap <- ages[match(hcp_type$cfar_pid,ages$cfar_pid,nomatch=NA),'age_after_year_gap']

hcp_type <- subset(hcp_type, age_at_hcp_start_date_modified >= age_at_t0 & (is.na(age_after_year_gap) | 
                                                                            age_at_hcp_start_date_modified < age_after_year_gap))
hcp_type <- hcp_type[order(hcp_type$cfar_pid, hcp_type$age),]
hcp_type$month <- with(hcp_type, (age - age_at_t0)*12+1)
hcp_type$month_rounded <- round(hcp_type$month)
hcp_type$pid.month <- paste(hcp_type$cfar_pid, hcp_type$month_rounded, sep='.')


hcp_type <- hcp_type[order(hcp_type$cfar_pid, hcp_type$age),]
junk <- tapply(hcp_type$hcp_date_type, INDEX=hcp_type$pid.month, FUN=function(y){
               if(all(is.na(y))){
                  x <- NA
               } else {
                  x <- paste(unique(y[!is.na(y)]),collapse=',')
               }
               if(is.na(x)){
                  j <- NA
               } else if(paste(unique(x,collapse=',')) == 'START,END'){
                  j <- 'BOTH'
               } else if(paste(unique(x,collapse=',')) == 'END,START'){
                  j <- 'END,START'
               } else {
                  j <- unique(y[!is.na(y)])
               }})
junk.df <- data.frame('pid.month'=names(junk),
                      'hcp_date_type'=junk)
hcp_type$hcp_date_type_modified <- as.vector(junk.df[match(hcp_type$pid.month,junk.df$pid.month,nomatch=NA),'hcp_date_type'])
time.df$hcp_date_type_modified <- hcp_type[match(time.df$pid.month,hcp_type$pid.month,nomatch=NA),'hcp_date_type_modified']

#--------------------------------#
junk <- lapply(split(subset(time.df, select=c(cfar_pid,hcp_date_type_modified,hcp_ind)),time.df$cfar_pid),FUN=function(y){
              which.end <- which(y[,'hcp_date_type_modified'] == 'END')
              if(length(which.end) > 0){
                 y[which.end,'hcp_ind'] <- y[(which.end - 1),'hcp_ind']
              }
              df <- data.frame('cfar_pid'=y[,'cfar_pid'],
                               'hcp_ind_modified'=y[,'hcp_ind'])
              return(df)})
junk.df <- do.call('rbind',junk)
time.df <- cbind(time.df, junk.df$hcp_ind_modified)
names(time.df)[dim(time.df)[2]] <- 'hcp_count_modified'


#---------------#
oth_nade$age_at_t0 <- data[match(oth_nade$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_t0']
oth_nade$age_after_year_gap <- ages[match(oth_nade$cfar_pid, ages$cfar_pid, nomatch=NA),'age_after_year_gap']
mh_nade$age_at_t0 <- data[match(mh_nade$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_t0']
mh_nade$age_after_year_gap <- ages[match(mh_nade$cfar_pid, ages$cfar_pid, nomatch=NA),'age_after_year_gap']

mh_nade_only$age_at_t0 <- data[match(mh_nade_only$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_t0']
mh_nade_only$age_after_year_gap <- data[match(mh_nade_only$cfar_pid, data$cfar_pid, nomatch=NA),'age_after_year_gap']

mh_nade_no_sa$age_at_t0 <- data[match(mh_nade_no_sa$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_t0']
mh_nade_no_sa$age_after_year_gap <- data[match(mh_nade_no_sa$cfar_pid, data$cfar_pid, nomatch=NA),'age_after_year_gap']

# Finding first NADE that is after age_at_t0 and before age_after_year_gap
params <- grep('age_at_nade_after_t0_date',names(oth_nade),value=TRUE)
sub_oth_nade <- subset(oth_nade, !is.na(age_at_nade_after_t0_date1))
first_nade <- apply(subset(sub_oth_nade, select=c(params, 'age_at_t0','age_after_year_gap','cfar_pid')), 1, FUN=function(y){
                    age_at_t0 <- as.numeric(y['age_at_t0'])
                    age_after_year_gap <- as.numeric(y['age_after_year_gap'])
                    which.age <- min(which(y[params] >= age_at_t0 & (y[params] < age_after_year_gap | 
                                                                     is.na(age_after_year_gap))))
                    if(which.age == Inf){
                       age_at_first_nade <- NA
                    } else {
                       age_at_first_nade <- as.numeric(y[which.age])
                    }
                    if(!is.na(age_at_first_nade)){
                       nade <- 1
                    } else {
                       nade <- 0
                    }
                    df <- data.frame('cfar_pid'=y['cfar_pid'],
                                     'age_of_first_nade'=age_at_first_nade,
                                     'nade'=nade)
                    return(df)})
first_nade.df <- do.call('rbind',first_nade)
first_nade.df <- subset(first_nade.df, !is.na(age_of_first_nade))

any_nade <- apply(subset(sub_oth_nade, select=c(params, 'age_at_t0','age_after_year_gap','cfar_pid')),1,FUN=function(y){
                     age_at_t0 <- as.numeric(y['age_at_t0'])
                     age_after_year_gap <- as.numeric(y['age_after_year_gap'])
                     which.age <- which(y[params] >= age_at_t0 & (y[params] < age_after_year_gap |
                                                               is.na(age_after_year_gap)))
                     if(length(which.age) == 0){
                        any_nade.df <- data.frame('cfar_pid'=y['cfar_pid'],
                                                     'age_of_nade'=NA,
                                                     'month_of_nade'=NA)
                     } else {
                        month.age <- as.numeric(ceiling(as.numeric(y[which.age]) - age_at_t0)*12+1)
                        any_nade.df <- data.frame('cfar_pid'=rep(y['cfar_pid'],length(which.age)),
                                                  'age_of_nade'=as.numeric(y[which.age]),
                                                  'month_of_nade'=month.age)
                     }
                     return(any_nade.df)
})
any_nade.df <- do.call('rbind',any_nade)
any_nade.df <- subset(any_nade.df, !is.na(age_of_nade))
any_nade.df$pid.month <- paste(any_nade.df$cfar_pid, any_nade.df$month_of_nade, sep='.')
time.df$month_of_nade <- any_nade.df[match(time.df$pid.month, any_nade.df$pid.month, nomatch=NA),'month_of_nade']


params <- grep('age_at_mh_nade_after_t0_date',names(mh_nade),value=TRUE)
sub_mh_nade <- subset(mh_nade, !is.na(age_at_mh_nade_after_t0_date1))
first_mh_nade <- apply(subset(sub_mh_nade, select=c(params, 'age_at_t0','age_after_year_gap','cfar_pid')), 1, FUN=function(y){
                    age_at_t0 <- as.numeric(y['age_at_t0'])
                    age_after_year_gap <- as.numeric(y['age_after_year_gap'])
                    which.age <- min(which(y[params] >= age_at_t0 & (y[params] < age_after_year_gap | 
                                                                     is.na(age_after_year_gap))))
                    if(which.age == Inf){
                       age_at_first_mh_nade <- NA
                    } else {
                       age_at_first_mh_nade <- as.numeric(y[which.age])
                    }
                    if(!is.na(age_at_first_mh_nade)){
                       mh_nade <- 1
                    } else {
                       mh_nade <- 0
                    }
                    df <- data.frame('cfar_pid'=y['cfar_pid'],
                                     'age_of_first_mh_nade'=age_at_first_mh_nade,
                                     'mh_nade'=mh_nade)
                    return(df)})
first_mh_nade.df <- do.call('rbind',first_mh_nade)
first_mh_nade.df <- subset(first_mh_nade.df, !is.na(age_of_first_mh_nade))


# mental health only NADEs
params <- grep('age_mh_nade',names(mh_nade_only),value=TRUE)
mh_nade_only$age_after_year_gap <- mh_nade[match(mh_nade_only$cfar_pid,mh_nade$cfar_pid,nomatch=NA),'age_after_year_gap']
mh_nade_only$age_at_t0 <- mh_nade[match(mh_nade_only$cfar_pid,mh_nade$cfar_pid,nomatch=NA),'age_at_t0']
mh_nade_only <- mh_nade_only[order(mh_nade_only$cfar_pid,mh_nade_only$age_mh_nade),]
first_mh_nade_only.df <- mh_nade_only[!duplicated(mh_nade_only$cfar_pid),c('cfar_pid','age_mh_nade','mh_nade','age_at_t0','age_after_year_gap')]
first_mh_nade_only.df <- subset(first_mh_nade_only.df, age_mh_nade >= age_at_t0 & (is.na(age_after_year_gap) | age_mh_nade < age_after_year_gap),select=c('cfar_pid','age_mh_nade','mh_nade'))
names(first_mh_nade_only.df) <- c('cfar_pid','age_of_first_mh_nade_only','mh_nade_only')

# mental health NADEs excluding SUBSTANCE ABUSE
params <- grep('age_mh_nade',names(mh_nade_no_sa),value=TRUE)
mh_nade_no_sa$age_after_year_gap <- mh_nade[match(mh_nade_no_sa$cfar_pid,mh_nade$cfar_pid,nomatch=NA),'age_after_year_gap']
mh_nade_no_sa$age_at_t0 <- mh_nade[match(mh_nade_no_sa$cfar_pid,mh_nade$cfar_pid,nomatch=NA),'age_at_t0']
mh_nade_no_sa <- mh_nade_no_sa[order(mh_nade_no_sa$cfar_pid,mh_nade_no_sa$age_mh_nade),]
first_mh_nade_no_sa.df <- mh_nade_no_sa[!duplicated(mh_nade_no_sa$cfar_pid),c('cfar_pid','age_mh_nade','mh_nade','age_at_t0','age_after_year_gap')]
first_mh_nade_no_sa.df <- subset(first_mh_nade_no_sa.df, age_mh_nade >= age_at_t0 & (is.na(age_after_year_gap) | age_mh_nade < age_after_year_gap),select=c('cfar_pid','age_mh_nade','mh_nade'))
names(first_mh_nade_no_sa.df) <- c('cfar_pid','age_of_first_mh_nade_no_sa','mh_nade_no_sa')

# Also need to get all mental health NADEs to create indicator if they have a mental health NADE prior to having another type of 
# NADE
params <- grep('age_at_mh_nade_after_t0_date',names(mh_nade),value=TRUE)
sub_mh_nade <- subset(mh_nade, !is.na(age_at_mh_nade_after_t0_date1))
any_mh_nade <- apply(subset(sub_mh_nade, select=c(params, 'age_at_t0','age_after_year_gap','cfar_pid')),1,FUN=function(y){
                     age_at_t0 <- as.numeric(y[7])
                     age_after_year_gap <- as.numeric(y[8])
                     which.age <- which(y[1:6] >= age_at_t0 & (y[1:6] < age_after_year_gap |
                                                               is.na(age_after_year_gap)))
                     if(length(which.age) == 0){
                        any_mh_nade.df <- data.frame('cfar_pid'=y[9],
                                                     'age_of_mh_nade'=NA,
                                                     'month_of_mh_nade'=NA)
                     } else {
                        month.age <- as.numeric(ceiling(as.numeric(y[which.age]) - age_at_t0)*12+1)
                        any_mh_nade.df <- data.frame('cfar_pid'=rep(y[9],length(which.age)),
                                                     'age_of_mh_nade'=as.numeric(y[which.age]),
                                                     'month_of_mh_nade'=month.age)
                     }
                     return(any_mh_nade.df)
})
any_mh_nade.df <- do.call('rbind',any_mh_nade)
any_mh_nade.df <- subset(any_mh_nade.df, !is.na(age_of_mh_nade))
any_mh_nade.df$pid.month <- paste(any_mh_nade.df$cfar_pid, any_mh_nade.df$month_of_mh_nade, sep='.')
time.df$month_of_mh_nade <- any_mh_nade.df[match(time.df$pid.month, any_mh_nade.df$pid.month, nomatch=NA),'month_of_mh_nade']

time.df$mh_nade_before_indicator <- ifelse(!is.na(time.df$month_of_mh_nade) & is.na(time.df$month_of_nade),1,0)


time.df$age_of_first_nade <- first_nade.df[match(time.df$cfar_pid, first_nade.df$cfar_pid, nomatch=NA),'age_of_first_nade']
time.df$nade <- first_nade.df[match(time.df$cfar_pid, first_nade.df$cfar_pid,nomatch=NA),'nade']
time.df$nade[is.na(time.df$nade)] <- 0
time.df$age_of_first_mh_nade <- first_mh_nade.df[match(time.df$cfar_pid, first_mh_nade.df$cfar_pid, nomatch=NA),'age_of_first_mh_nade']
time.df$mh_nade <- first_mh_nade.df[match(time.df$cfar_pid, first_mh_nade.df$cfar_pid,nomatch=NA),'mh_nade']
time.df$mh_nade[is.na(time.df$mh_nade)] <- 0


time.df$age_of_first_mh_nade_only <- first_mh_nade_only.df[match(time.df$cfar_pid, first_mh_nade_only.df$cfar_pid,
                                                                 nomatch=NA),'age_of_first_mh_nade_only']
time.df$mh_nade_only <- first_mh_nade_only.df[match(time.df$cfar_pid, first_mh_nade_only.df$cfar_pid,nomatch=NA),'mh_nade_only']
time.df$mh_nade_only[is.na(time.df$mh_nade_only)] <- 0

time.df$age_of_first_mh_nade_no_sa <- first_mh_nade_no_sa.df[match(time.df$cfar_pid, first_mh_nade_no_sa.df$cfar_pid,
                                                                 nomatch=NA),'age_of_first_mh_nade_no_sa']
time.df$mh_nade_no_sa <- first_mh_nade_no_sa.df[match(time.df$cfar_pid, first_mh_nade_no_sa.df$cfar_pid,nomatch=NA),'mh_nade_no_sa']
time.df$mh_nade_no_sa[is.na(time.df$mh_nade_no_sa)] <- 0


# Need to add in indicator for pregnancy.  Only have age of delivery so will have to go back 9 months to get age at pregnancy start
time.df$age_at_subsequent_deliv_1 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_subsequent_deliv_1']
time.df$month_at_subsequent_deliv_1 <- ceiling((time.df$age_at_subsequent_deliv_1 - time.df$age_at_t0)*12+1)

time.df$age_at_subsequent_deliv_2 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_subsequent_deliv_2']
time.df$month_at_subsequent_deliv_2 <- ceiling((time.df$age_at_subsequent_deliv_2 - time.df$age_at_t0)*12+1)

time.df$age_at_subsequent_deliv_3 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_subsequent_deliv_3']
time.df$month_at_subsequent_deliv_3 <- ceiling((time.df$age_at_subsequent_deliv_3 - time.df$age_at_t0)*12+1)

# Estimated date of conception (edc)
time.df$age_at_subsequent_edc_1 <- data[match(time.df$cfar_pid, data$cfar_pid,nomatch=NA),'age_at_subsequent_edc_1']
time.df$month_at_subsequent_edc_1 <- ceiling((time.df$age_at_subsequent_edc_1 - time.df$age_at_t0)*12+1)

time.df$age_at_subsequent_edc_2 <- data[match(time.df$cfar_pid, data$cfar_pid,nomatch=NA),'age_at_subsequent_edc_2']
time.df$month_at_subsequent_edc_2 <- ceiling((time.df$age_at_subsequent_edc_2 - time.df$age_at_t0)*12+1)

time.df$age_at_subsequent_edc_3 <- data[match(time.df$cfar_pid, data$cfar_pid,nomatch=NA),'age_at_subsequent_edc_3']
time.df$month_at_subsequent_edc_3 <- ceiling((time.df$age_at_subsequent_edc_3 - time.df$age_at_t0)*12+1)


# Pregnancy indicator
time.df$pregnancy_indicator <- ifelse(!is.na(time.df$month_at_subsequent_deliv_1) & 
                                      time.df$month >= time.df$month_at_subsequent_edc_1 & 
                                      time.df$month < time.df$month_at_subsequent_deliv_1,1,
                                      ifelse(!is.na(time.df$month_at_subsequent_deliv_2) & 
                                      time.df$month >= time.df$month_at_subsequent_edc_2 & 
                                      time.df$month < time.df$month_at_subsequent_deliv_2,1,
                                      ifelse(!is.na(time.df$month_at_subsequent_deliv_3) & 
                                      time.df$month >= time.df$month_at_subsequent_edc_3 & 
                                      time.df$month < time.df$month_at_subsequent_deliv_3,1,0)))
time.df <- time.df[order(time.df$cfar_pid, time.df$age),]


# Now need to come up with end of follow-up for each of the different outcomes
# NADE (excluding mental health ones)
# Need to add in what the last age is for every person and then if their age_at_death is greater than that, make it missing
time.df$last_age <- NA
split(time.df$last_age, time.df$cfar_pid) <- lapply(split(time.df$age, time.df$cfar_pid),FUN=function(y){
                                                    max(y)})
time.df$age_at_death <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_death']
time.df$age_at_death <- ifelse(!is.na(time.df$age_at_death) & time.df$age_at_death > time.df$last_age,NA,
                               time.df$age_at_death)

time.df$month_at_death <- ceiling((time.df$age_at_death - time.df$age_at_t0)*12)
time.df <- subset(time.df, is.na(age_at_death) | age <= age_at_death)
time.df$last_age <- NA
split(time.df$last_age, time.df$cfar_pid) <- lapply(split(time.df$age, time.df$cfar_pid),FUN=function(y){
                                                    max(y)})

# For NADE outcome (excluding mental health)
time.df$max_age_for_nade_outcome <- NA
split(time.df$max_age_for_nade_outcome, time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(last_age,age_of_first_nade)),
                                                                                 time.df$cfar_pid),FUN=min, na.rm=TRUE)
time.df$max_month_for_nade_outcome <- ceiling((time.df$max_age_for_nade_outcome - time.df$age_at_t0)*12)

time.df$nade_outcome_indicator <- ifelse((time.df$month == time.df$max_month_for_nade_outcome | 
                                         time.df$age == time.df$max_age_for_nade_outcome) & 
                                         (!is.na(time.df$age_of_first_nade) & round(time.df$age_of_first_nade,5) <= round(time.df$last_age,5)),1,0)

# For NADE outcome (including mental health)
time.df$max_age_for_mh_nade_outcome <- NA
split(time.df$max_age_for_mh_nade_outcome, time.df$cfar_pid) <- lapply(split(subset(time.df,
                                                                                    select=c(last_age,age_of_first_mh_nade)),
                                                                                 time.df$cfar_pid),FUN=min, na.rm=TRUE)
time.df$max_month_for_mh_nade_outcome <- ceiling((time.df$max_age_for_mh_nade_outcome - time.df$age_at_t0)*12)

time.df$mh_nade_outcome_indicator <- ifelse((time.df$month == time.df$max_month_for_mh_nade_outcome | 
                                            time.df$age == time.df$max_age_for_mh_nade_outcome) & 
                                            (!is.na(time.df$age_of_first_mh_nade) & time.df$age_of_first_mh_nade <= time.df$last_age),1,0)


# For NADE outcome (ONLY mental health)
time.df$max_age_for_mh_nade_only_outcome <- NA
split(time.df$max_age_for_mh_nade_only_outcome, time.df$cfar_pid) <- lapply(split(subset(time.df,
                                                                                    select=c(last_age,age_of_first_mh_nade_only)),
                                                                                 time.df$cfar_pid),FUN=min, na.rm=TRUE)
time.df$max_month_for_mh_nade_only_outcome <- ceiling((time.df$max_age_for_mh_nade_only_outcome - time.df$age_at_t0)*12)

time.df$mh_nade_only_outcome_indicator <- ifelse((time.df$month == time.df$max_month_for_mh_nade_only_outcome | 
                                            time.df$age == time.df$max_age_for_mh_nade_only_outcome) & 
                                            (!is.na(time.df$age_of_first_mh_nade_only) & time.df$age_of_first_mh_nade_only <= time.df$last_age),1,0)

# For NADE outcome (excluding SUBSTANCE ABUSE)
time.df$max_age_for_mh_nade_no_sa_outcome <- NA
split(time.df$max_age_for_mh_nade_no_sa_outcome, time.df$cfar_pid) <- lapply(split(subset(time.df,
                                                                                    select=c(last_age,age_of_first_mh_nade_no_sa)),
                                                                                 time.df$cfar_pid),FUN=min, na.rm=TRUE)
time.df$max_month_for_mh_nade_no_sa_outcome <- ceiling((time.df$max_age_for_mh_nade_no_sa_outcome - time.df$age_at_t0)*12)

time.df$mh_nade_no_sa_outcome_indicator <- ifelse((time.df$month == time.df$max_month_for_mh_nade_no_sa_outcome | 
                                            time.df$age == time.df$max_age_for_mh_nade_no_sa_outcome) & 
                                            (!is.na(time.df$age_of_first_mh_nade_no_sa) & time.df$age_of_first_mh_nade_no_sa <= time.df$last_age),1,0)

# For death
# For NADE outcome (excluding mental health)
time.df$max_age_for_death_outcome <- NA
split(time.df$max_age_for_death_outcome, time.df$cfar_pid) <- lapply(split(subset(time.df,
                                                                                  select=c(last_age,age_at_death)),
                                                                                 time.df$cfar_pid),FUN=min, na.rm=TRUE)
time.df$max_month_for_death_outcome <- ceiling((time.df$max_age_for_death_outcome - time.df$age_at_t0)*12)


time.df$death_outcome_indicator <- ifelse((time.df$month == time.df$max_month_for_death_outcome | 
                                           time.df$age == time.df$max_age_for_death) & 
                                           !is.na(time.df$age_at_death),1,0)

# Combined NADE/Death outcome
time.df$max_age_for_nade_dth_outcome <- NA
split(time.df$max_age_for_nade_dth_outcome, time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(last_age, 
                                                                                                       age_of_first_nade,age_at_death)),
                                                                                 time.df$cfar_pid),FUN=min, na.rm=TRUE)
time.df$max_month_for_nade_dth_outcome <- ceiling((time.df$max_age_for_nade_dth_outcome - time.df$age_at_t0)*12)

time.df$nade_dth_outcome_indicator <- ifelse((time.df$month == time.df$max_month_for_nade_dth_outcome | 
                                             time.df$age == time.df$max_age_for_nade_dth_outcome) &
                                             ((!is.na(time.df$age_of_first_nade) & time.df$age_of_first_nade <= time.df$last_age) | 
                                             !is.na(time.df$age_at_death)),1,0)

# Combined mental health NADE/Death outcome
time.df$max_age_for_mh_nade_dth_outcome <- NA
split(time.df$max_age_for_mh_nade_dth_outcome, time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(last_age, 
                                                                                                       age_of_first_mh_nade,age_at_death)),
                                                                                 time.df$cfar_pid),FUN=min, na.rm=TRUE)
time.df$max_month_for_mh_nade_dth_outcome <- ceiling((time.df$max_age_for_mh_nade_dth_outcome - time.df$age_at_t0)*12)

time.df$mh_nade_dth_outcome_indicator <- ifelse((time.df$month == time.df$max_month_for_mh_nade_dth_outcome |
                                                time.df$age == time.df$max_age_for_mh_nade_dth_outcome) & 
                                                ((!is.na(time.df$age_of_first_mh_nade) & time.df$age_of_first_mh_nade <= time.df$last_age) | 
                                                !is.na(time.df$age_at_death)),1,0)


# Combined mental health only NADE/Death outcome
time.df$max_age_for_mh_nade_only_dth_outcome <- NA
split(time.df$max_age_for_mh_nade_only_dth_outcome, time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(last_age, 
                                                                                                       age_of_first_mh_nade_only,age_at_death)),
                                                                                 time.df$cfar_pid),FUN=min, na.rm=TRUE)
time.df$max_month_for_mh_nade_only_dth_outcome <- ceiling((time.df$max_age_for_mh_nade_only_dth_outcome - time.df$age_at_t0)*12)

time.df$mh_nade_only_dth_outcome_indicator <- ifelse((time.df$month == time.df$max_month_for_mh_nade_only_dth_outcome |
                                                time.df$age == time.df$max_age_for_mh_nade_only_dth_outcome) & 
                                                ((!is.na(time.df$age_of_first_mh_nade_only) & time.df$age_of_first_mh_nade_only <= time.df$last_age) | 
                                                !is.na(time.df$age_at_death)),1,0)

# Combined mental health NADE (excluding SUBSTANCE ABUSE)/Death outcome
time.df$max_age_for_mh_nade_no_sa_dth_outcome <- NA
split(time.df$max_age_for_mh_nade_no_sa_dth_outcome, time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(last_age, 
                                                                                                       age_of_first_mh_nade_no_sa,age_at_death)),
                                                                                 time.df$cfar_pid),FUN=min, na.rm=TRUE)
time.df$max_month_for_mh_nade_no_sa_dth_outcome <- ceiling((time.df$max_age_for_mh_nade_no_sa_dth_outcome - time.df$age_at_t0)*12)

time.df$mh_nade_no_sa_dth_outcome_indicator <- ifelse((time.df$month == time.df$max_month_for_mh_nade_no_sa_dth_outcome |
                                                time.df$age == time.df$max_age_for_mh_nade_no_sa_dth_outcome) & 
                                                ((!is.na(time.df$age_of_first_mh_nade_no_sa) & time.df$age_of_first_mh_nade_no_sa <= time.df$last_age) | 
                                                !is.na(time.df$age_at_death)),1,0)

# Adding in ADE information
time.df$age_at_ade_date1 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_ade_date1']
time.df$age_at_ade_date1 <- ifelse(!is.na(time.df$age_at_ade_date1) & 
                                   time.df$age_at_ade_date1 > time.df$last_age,NA,
                                   time.df$age_at_ade_date1)
time.df$month_at_ade_date1 <- ceiling((time.df$age_at_ade_date1 - time.df$age_at_t0)*12+1)
time.df$ade1 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'ade1']
time.df$ade1 <- ifelse(is.na(time.df$age_at_ade_date1),NA,time.df$ade1)

time.df$age_at_ade_date2 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_ade_date2']
time.df$age_at_ade_date2 <- ifelse(!is.na(time.df$age_at_ade_date2) & 
                                   time.df$age_at_ade_date2 > time.df$last_age,NA,
                                   time.df$age_at_ade_date2)
time.df$month_at_ade_date2 <- ceiling((time.df$age_at_ade_date2 - time.df$age_at_t0)*12+1)
time.df$ade2 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'ade2']
time.df$ade2 <- ifelse(is.na(time.df$age_at_ade_date2),NA,time.df$ade2)

time.df$age_at_ade_date3 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_ade_date3']
time.df$age_at_ade_date3 <- ifelse(!is.na(time.df$age_at_ade_date3) & 
                                   time.df$age_at_ade_date3 > time.df$last_age,NA,
                                   time.df$age_at_ade_date3)
time.df$month_at_ade_date3 <- ceiling((time.df$age_at_ade_date3 - time.df$age_at_t0)*12+1)
time.df$ade3 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'ade3']
time.df$ade3 <- ifelse(is.na(time.df$age_at_ade_date3),NA,time.df$ade3)


time.df$age_at_ade_date4 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_ade_date4']
time.df$age_at_ade_date4 <- ifelse(!is.na(time.df$age_at_ade_date1) & 
                                   time.df$age_at_ade_date4 > time.df$last_age,NA,
                                   time.df$age_at_ade_date4)
time.df$month_at_ade_date4 <- ceiling((time.df$age_at_ade_date4 - time.df$age_at_t0)*12+1)
time.df$ade4 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'ade4']
time.df$ade4 <- ifelse(is.na(time.df$age_at_ade_date4),NA,time.df$ade4)


time.df$ade_indicator <- ifelse(time.df$month_at_ade_date1 == time.df$month & !is.na(time.df$month_at_ade_date1),1,
                                ifelse(time.df$month_at_ade_date2 == time.df$month & !is.na(time.df$month_at_ade_date2),1,
                                       ifelse(time.df$month_at_ade_date3 == time.df$month & !is.na(time.df$month_at_ade_date3),1,
                                              ifelse(time.df$month_at_ade_date4 == time.df$month & !is.na(time.df$month_at_ade_date4),1,0))))


# Cumulative HC use
# Need to split up by HCP use to get cumulative sum just within that episode of HCP use first.
time.df$hcp_number <- NA
split(time.df$hcp_number, time.df$cfar_pid) <- lapply(split(subset(time.df,select=c(hcp_date_type_modified,cfar_pid)),
                                                                time.df$cfar_pid),
                                                          function(x){
                                                             cumsum(x[,1] %in% c('START','END,START','BOTH'))
                                                             })
time.df$hcp_number <- ifelse(time.df$hcp_ind == 0,NA,time.df$hcp_number)
time.df$pid.hcp <- paste(time.df$cfar_pid, time.df$hcp_number, sep='.')

sub_time.df <- subset(time.df, hcp_ind == 1, select=c(cfar_pid,age, pid.hcp))
sub_time.df$diff_by_hcp <- NA
split(sub_time.df$diff_by_hcp, sub_time.df$pid.hcp) <- lapply(split(sub_time.df$age, sub_time.df$pid.hcp), 
                                                           FUN=function(y){
                                                             if(length(unlist(y)) == 1){
                                                                return(1/12)
                                                             } else {
                                                                return(c(diff(unlist(y)),0))
                                                             return(c(diff(unlist(y)),1/12))
                                                             } })
sub_time.df$pid.age <- paste(sub_time.df$cfar_pid, sub_time.df$age,sep='.')
time.df$pid.age <- paste(time.df$cfar_pid, time.df$age, sep='.')
time.df$diff_by_hcp <- sub_time.df[match(time.df$pid.age,sub_time.df$pid.age,nomatch=NA),'diff_by_hcp']


sub_time.df$diff_by_hcp_modified <- NA
split(sub_time.df$diff_by_hcp_modified, sub_time.df$pid.hcp) <- lapply(split(sub_time.df$age, sub_time.df$pid.hcp),
                                                                  FUN=function(y){
                                                                     if(length(unlist(y)) == 1){
                                                                        return(1/12)
                                                                     } else {
                                                                        return(c(0,diff(unlist(y))))
                                                                     } })
time.df$diff_by_hcp_modified <- sub_time.df[match(time.df$pid.age, sub_time.df$pid.age,nomatch=NA),'diff_by_hcp_modified']

split(time.df$diff_by_hcp_modified, time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(cfar_pid,diff_by_hcp_modified,hcp_ind)),time.df$cfar_pid),FUN=function(y){
                                                            new_diff <- y[,'diff_by_hcp_modified']
                                                            if(nrow(y) > 1){
                                                               for(i in 2:nrow(y)){
                                                                  if(y[(i-1),'hcp_ind'] == 1 & y[i,'hcp_ind'] == 0){
                                                                        new_diff[i] <- 1/12
                                                                  }
                                                               }
                                                            } 
                                                            return(new_diff)
                                                            })


sub_time.df$diff_by_hcp_modified <- time.df[match(sub_time.df$pid.age, time.df$pid.age,nomatch=NA),'diff_by_hcp_modified']


# Then will sum up the individual cumulative sums to get an overall sum
time.df$cum_hcp <- NA
sub_time.df$cum_hcp <- NA
split(sub_time.df$cum_hcp, sub_time.df$cfar_pid) <- lapply(split(sub_time.df$diff_by_hcp, sub_time.df$cfar_pid),
                                                           FUN=function(y){
                                                              c(0,cumsum(y[-length(unlist(y))]))})
time.df$cum_hcp <- sub_time.df[match(time.df$pid.age, sub_time.df$pid.age, nomatch=NA),'cum_hcp']


# Then will sum up the individual cumulative sums to get an overall sum
time.df$cum_hcp_modified <- NA
sub_time.df$cum_hcp_modified <- NA
split(time.df$cum_hcp_modified, time.df$cfar_pid) <- lapply(split(time.df$diff_by_hcp_modified, time.df$cfar_pid),
                                                           FUN=function(y){
                                                           which.na <- which(is.na(y))
                                                           y[which.na] <- 0
                                                              c(cumsum(y))})
sub_time.df$cum_hcp_modified <- time.df[match(sub_time.df$pid.age, time.df$pid.age, nomatch=NA),'cum_hcp_modified']


# Then need to fill in missing values with last observation carried forward
# Filling in CD4 counts
time.df$new_cum_hcp <- unsplit(lapply(split(time.df$cum_hcp,time.df$cfar_pid),
                                      function(x){L <- !is.na(x);x[c(NA,which(L))[cumsum(L)+1]]}),time.df$cfar_pid)
time.df$new_cum_hcp <- ifelse(is.na(time.df$new_cum_hcp),0,time.df$new_cum_hcp)

# Also need to include a lagged hcp_indicator for weighting model -- probability of being on HC this month depends on 
# HC status last month
lag_hcp_indicator <- lapply(split(time.df$hcp_ind, time.df$cfar_pid),FUN=function(y){
                             c(0,y[-length(y)])})
time.df$lag_hcp_indicator <- unlist(lag_hcp_indicator)

# Finally, need logged baseline viral load
time.df$log10_baseline_vl <- log10(time.df$baseline_vl_b)


# Need to have a cumulative time not on HC for calculating incidence rates
time.df$diff_time <- NA
split(time.df$diff_time,time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(age,hcp_ind)), time.df$cfar_pid),FUN=function(y){
                                                tmp <- c(0,diff(unlist(y[,'age'])))
                                                tmp <- ifelse(y[,'hcp_ind'] == 1,0,tmp)
                                                return(tmp) } )

time.df$hcp_diff <- NA
split(time.df$hcp_diff, time.df$cfar_pid) <- lapply(split(time.df$hcp_ind, time.df$cfar_pid),FUN=function(y){
                                                tmp <- c(0,diff(unlist(y)))
                                                return(tmp) })

time.df$diff_time <- ifelse(time.df$hcp_diff == -1,0.0,time.df$diff_time)

time.df$cum_no_hcp <- NA
split(time.df$cum_no_hcp, time.df$cfar_pid) <- lapply(split(time.df$diff_time, time.df$cfar_pid),FUN=function(y){
                                                  tmp <- cumsum(y)
                                                  return(tmp) })


# Need to have a cumulative time not on HC for calculating incidence rates
time.df$diff_time_modified <- NA
split(time.df$diff_time_modified,time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(age,hcp_ind)), time.df$cfar_pid),FUN=function(y){
                                                tmp <- c(0,diff(unlist(y[,'age'])))
                                                tmp <- ifelse(y[,'hcp_ind'] == 1,0,tmp)
                                                if(nrow(y) > 1){
                                                   for(i in 2:nrow(y)){
                                                      if(y[(i-1),'hcp_ind'] == 0 & y[i,'hcp_ind'] == 1){
                                                         tmp[i] <- 1/12
                                                      }
                                                      if(y[(i-1),'hcp_ind'] == 1 & y[i,'hcp_ind'] == 0){
                                                         tmp[i] <- 0
                                                      }
                                                   }
                                                }
                                                return(tmp) } )


time.df$cum_no_hcp_modified <- NA
split(time.df$cum_no_hcp_modified, time.df$cfar_pid) <- lapply(split(time.df$diff_time_modified, time.df$cfar_pid),FUN=function(y){
                                                  tmp <- cumsum(y)
                                                  return(tmp) })

# Calculating person time for different HC types (Oral versus DMPA)
time.df$hcp_number_depo <- NA
time.df$hcp_number_other <- NA
split(time.df$hcp_number_depo, time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(hcp_date_type_modified,
                                                                                   hcp_title_depo,cfar_pid)),
                                                           time.df$cfar_pid),FUN=function(y){
                                                              cumsum(y[,1] %in% c('START','END,START','BOTH') &
                                                                     y[,2] == 'Depo-Provera' &
                                                                     !is.na(y[,2]))})
split(time.df$hcp_number_other, time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(hcp_date_type_modified,
                                                                                          hcp_other_category,cfar_pid)),
                                                                 time.df$cfar_pid),FUN=function(y){
                                                                    cumsum(y[,1] %in% c('START','END,START','BOTH') &
                                                                           y[,2] == 'Other' & !is.na(y[,2]))})
time.df$hcp_number_depo <- ifelse(time.df$hcp_ind == 0,NA,time.df$hcp_number_depo)
time.df$hcp_number_other <- ifelse(time.df$hcp_ind == 0,NA,time.df$hcp_number_other)

time.df$pid.hcp.depo <- paste(time.df$cfar_pid, time.df$hcp_number_depo, sep='.')
time.df$pid.hcp.other <- paste(time.df$cfar_pid, time.df$hcp_number_other, sep='.')

# Depo
sub_time.df <- subset(time.df, hcp_ind == 1 & hcp_title_depo == 'Depo-Provera', select=c(cfar_pid,age, pid.hcp.depo))
sub_time.df$diff_by_depo <- NA
split(sub_time.df$diff_by_depo, sub_time.df$pid.hcp.depo) <- lapply(split(sub_time.df$age, sub_time.df$pid.hcp.depo),
                                                           FUN=function(y){
                                                              if(length(unlist(y)) == 1){
                                                                 return(1/12)
                                                              } else {
                                                                 return(c(diff(unlist(y)),0))
                                                              } })
sub_time.df$pid.age <- paste(sub_time.df$cfar_pid, sub_time.df$age,sep='.')
time.df$pid.age <- paste(time.df$cfar_pid, time.df$age, sep='.')
time.df$diff_by_depo <- sub_time.df[match(time.df$pid.age,sub_time.df$pid.age,nomatch=NA),'diff_by_depo']


time.df$diff_by_depo <- ifelse(is.na(time.df$diff_by_depo),0,time.df$diff_by_depo)
sub_time.df$diff_by_depo <- ifelse(is.na(sub_time.df$diff_by_depo),0,sub_time.df$diff_by_depo)


# Then will sum up the individual cumulative sums to get an overall sum
time.df$cum_depo <- NA
# sub_time.df$cum_depo <- NA

split(time.df$cum_depo, time.df$cfar_pid) <- lapply(split(time.df$diff_by_depo, time.df$cfar_pid),
                                                           FUN=function(y){
                                                              c(0,cumsum(y[-length(y)]))})


# Then need to fill in missing values with last observation carried forward
# Filling in CD4 counts
time.df$new_cum_depo <- unsplit(lapply(split(time.df$cum_depo,time.df$cfar_pid),
                                      function(x){L <- !is.na(x);x[c(NA,which(L))[cumsum(L)+1]]}),time.df$cfar_pid)
time.df$new_cum_depo <- ifelse(is.na(time.df$new_cum_depo),0,time.df$new_cum_depo)


# Other
sub_time.df <- subset(time.df, hcp_ind == 1 & hcp_other_category == 'Other', select=c(cfar_pid,age, pid.hcp.other))
sub_time.df$diff_by_other <- NA
split(sub_time.df$diff_by_other, sub_time.df$pid.hcp.other) <- lapply(split(sub_time.df$age, sub_time.df$pid.hcp.other),
                                                           FUN=function(y){
                                                              if(length(unlist(y)) == 1){
                                                                 return(1/12)
                                                              } else {
                                                                 return(c(diff(unlist(y)),0))
                                                              } })

sub_time.df$pid.age <- paste(sub_time.df$cfar_pid, sub_time.df$age,sep='.')
time.df$pid.age <- paste(time.df$cfar_pid, time.df$age, sep='.')
time.df$diff_by_other <- sub_time.df[match(time.df$pid.age,sub_time.df$pid.age,nomatch=NA),'diff_by_other']


time.df$diff_by_other <- ifelse(is.na(time.df$diff_by_other),0,time.df$diff_by_other)
sub_time.df$diff_by_other <- ifelse(is.na(sub_time.df$diff_by_other),0,sub_time.df$diff_by_other)


# Then will sum up the individual cumulative sums to get an overall sum
time.df$cum_other <- NA
sub_time.df$cum_other <- NA
split(time.df$cum_other, time.df$cfar_pid) <- lapply(split(time.df$diff_by_other, time.df$cfar_pid),
                                                           FUN=function(y){
                                                              c(0,cumsum(unlist(y[-length(unlist(y))])))})


# Then need to fill in missing values with last observation carried forward
# Filling in CD4 counts
time.df$new_cum_other <- unsplit(lapply(split(time.df$cum_other,time.df$cfar_pid),
                                      function(x){L <- !is.na(x);x[c(NA,which(L))[cumsum(L)+1]]}),time.df$cfar_pid)
time.df$new_cum_other <- ifelse(is.na(time.df$new_cum_other),0,time.df$new_cum_other)


# Finding the max new_cum_depo and max of new_cum_oral per person
time.df$max_cum_depo <- NA
split(time.df$max_cum_depo, time.df$cfar_pid) <- lapply(split(time.df$cum_depo, time.df$cfar_pid),
                                                        FUN=function(y){
                                                           max(y,na.rm=TRUE)})
time.df$max_cum_other <- NA
split(time.df$max_cum_other, time.df$cfar_pid) <- lapply(split(time.df$cum_other, time.df$cfar_pid),
                                                        FUN=function(y){
                                                           max(y,na.rm=TRUE)})

# Finding total person time for each type of HC (Depo vs oral)
total_depo_time <- sum(time.df$max_cum_depo[!duplicated(time.df$cfar_pid)],na.rm=TRUE)

total_other_time <- sum(time.df$max_cum_other[!duplicated(time.df$cfar_pid)],na.rm=TRUE)


# Table 1 summaries
# Need to create a variable for number of months followed, maximum cumulative HC use per person, ADE indicator (max of ade_indicator),
# and indicator for who was ever on HAART and ART during follow-up
time.df$max_number_months_followed <- NA
split(time.df$max_number_months_followed, time.df$cfar_pid) <- lapply(split(time.df$month, time.df$cfar_pid), FUN=function(y){
                                                                        max(y)})
time.df$max_cum_HC_use <- NA
split(time.df$max_cum_HC_use, time.df$cfar_pid) <- lapply(split(time.df$new_cum_hcp, time.df$cfar_pid), FUN=function(y){
                                                            max(y)})

time.df$any_ade_during_followup <- NA
split(time.df$any_ade_during_followup, time.df$cfar_pid) <- lapply(split(time.df$ade_indicator, time.df$cfar_pid), FUN=function(y){
                                                                     max(y)})
time.df$any_ade_during_followup.factor <- factor(time.df$any_ade_during_followup, levels=c(0,1), labels=c('No','Yes'))
time.df$any_haart_during_followup <- NA
split(time.df$any_haart_during_followup, time.df$cfar_pid) <- lapply(split(time.df$haart_ind, time.df$cfar_pid), FUN=function(y){
                                                                      max(y)})
time.df$any_haart_during_followup.factor <- factor(time.df$any_haart_during_followup, levels=c(0,1), labels=c('No','Yes'))

time.df$any_art_during_followup <- NA
split(time.df$any_art_during_followup, time.df$cfar_pid) <- lapply(split(time.df$reg_indicator, time.df$cfar_pid), FUN=function(y){
                                                                    max(y)})
time.df$any_art_during_followup.factor <- factor(time.df$any_art_during_followup, levels=c(0,1), labels=c('No','Yes'))
time.df$ade_indicator.factor <- factor(time.df$ade_indicator, levels=c(0,1), labels=c('No','Yes'))
time.df$prior_hcp_t0.factor <- factor(time.df$prior_hcp_t0, levels=c(0,1), labels=c('No','Yes'))
time.df$prior_hcv_t0.factor <- factor(time.df$prior_hcv_t0, levels=c(0,1), labels=c('No','Yes'))
any_hc_use <- tapply(time.df$hcp_ind, INDEX=time.df$cfar_pid, FUN=max)
any_hc_use.df <- data.frame('cfar_pid'=names(any_hc_use),
                            'any_hc_use'=any_hc_use)
time.df$any_hc_use <- any_hc_use.df[match(time.df$cfar_pid, any_hc_use.df$cfar_pid,nomatch=NA),'any_hc_use']
time.df$any_hc_use.factor <- factor(time.df$any_hc_use, levels=c(0,1), labels=c('No HC use','HC use'))


# Need to include distribution of person time for the different types of HC in Table 1
time.df$prior_non_ade.factor <- factor(time.df$prior_non_ade, levels=c(0,1),labels=c('No','Yes'))
time.df$prior_mh_non_ade_no_sa.factor <- factor(time.df$prior_mh_non_ade_no_sa, levels=c(0,1),labels=c('No','Yes'))
time.df$prior_mh_non_ade.factor <- factor(time.df$prior_mh_non_ade, levels=c(0,1),labels=c('No','Yes'))
time.df$prior_mh_non_ade_only.factor <- factor(time.df$prior_mh_non_ade_only, levels=c(0,1),labels=c('No','Yes'))


tbl1.summ <- summary(any_hc_use.factor ~ race_2level.factor + route_of_infection_combined.factor + age_at_t0 + max_number_months_followed +
                        max_cum_HC_use + year_of_t0 + baseline_cd4_count_b + log10_baseline_vl + prior_hcv_t0.factor +
                        any_ade_during_followup.factor + prior_hcp_t0.factor + prior_non_ade.factor + prior_mh_non_ade_no_sa.factor +
                        prior_mh_non_ade.factor + prior_mh_non_ade_only.factor + any_haart_during_followup.factor +
                        any_art_during_followup.factor + max_cum_depo + max_cum_other, data=time.df,digits=2,
                     subset=!duplicated(cfar_pid),method='reverse',overall=TRUE,test=TRUE)
tbl1.summ$labels <- c('Race','Route of infection','Baseline age','Months of follow-up','HC use per person (years)',
                      'Year of first clinic visit','Baseline CD4','Baseline HIV1-RNA (log-transformed)',
                      'HCV prior to first clinic visit','ADE during followup','HC use prior to first clinic visit',
                      'Prior NADE','Prior NADE (excluding substance abuse)','Prior NADE (including substance abuse)',
                      'Prior mental health NADE','Any HAART during followup','Any ART during followup','DMPA HC use per person (years)',
                      'Non-DMPA HC use per person (years)')

tmp <- subset(time.df, any_hc_use == 1)
tmp$hcp_title_depo <- factor(tmp$hcp_title_depo)
tmp$hcp_title_other <- factor(tmp$hcp_title_other)
tmp$all_patients <- 1
tmp.summ <- summary(all_patients ~ tmp$hcp_title_depo + tmp$hcp_title_other, data=tmp, method='reverse')

tmp.summ$labels <- c('Summary of Depo use','Summary of non-Depo use')


# Function to run weighting and marginal structural models
by_outcomes <- function(max_month, out, nade_var, time.df){
   df <- subset(time.df, month <= max_month)
   df <- time.df[which(time.df$month <= time.df[[max_month]]),]
   fmla.denom <- as.formula(paste('hcp_ind.factor ~ age_at_t0 + rcs(month,4) + race_2level.factor + rcs(cd4_count_b,3) +
                                       (1-undetectable_vl)*log10_hiv1_rna_b + haart_ind + reg_indicator + route_of_infection_combined.factor +
                                       prior_hcv_t0.factor + year_of_t0 + ade_indicator + prior_hcp_t0 + ',nade_var,sep=''))
   wt.mod.denom <- glm(fmla.denom,data=df, subset=pregnancy_indicator == 0, family='binomial')
   denom <- exp(predict(wt.mod.denom))/(1+exp(predict(wt.mod.denom)))

   #plus time-invariant covariates from above
   fmla.num <- as.formula(paste('hcp_ind.factor ~ rcs(month,4) + age_at_t0 + race_2level.factor + route_of_infection_combined.factor +
                                            prior_hcv_t0.factor + year_of_t0 + prior_hcp_t0 + ',nade_var,sep=''))
   wt.mod.num <- glm(fmla.num,data=df, subset=pregnancy_indicator == 0 & !is.na(cd4_count_b) & !is.na(log10_hiv1_rna_b),family='binomial')
   num <- exp(predict(wt.mod.num))/(1+exp(predict(wt.mod.num)))

   wt <- ifelse(df$hcp_ind[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] == 1,
                num/denom, (1-num)/(1-denom))

   quant.wt <- quantile(wt, probs=c(0.025,0.975))
   wt <- ifelse(wt < quant.wt[1], quant.wt[1],
                ifelse(wt > quant.wt[2],quant.wt[2],wt))

   outcome <- eval(parse(text=paste('df$',out,sep='')))[which(df$pregnancy_indicator == 0 &
                                                              !is.na(df$cd4_count_b) &
                                                              !is.na(df$log10_hiv1_rna_b))]
   df$outcome <- NA
   df$outcome[which(df$pregnancy_indicator == 0)] <- outcome
   df$wt <- NA
   df$wt[which(df$pregnancy_indicator == 0)] <- wt
   fmla.main <- as.formula(paste('outcome ~ rcs(age_at_t0,3) + month + race_2level.factor + route_of_infection_combined.factor + hcp_ind.factor +
                        new_cum_hcp + prior_hcv_t0.factor + year_of_t0 + baseline_cd4_count_b +
                        log10_baseline_vl + prior_hcp_t0 + ',nade_var,sep=''))
   msm <- glm(fmla.main, data=df, weights=wt,family='quasibinomial',subset=pregnancy_indicator == 0)
   outcome2 <- eval(parse(text=paste('df$',out,sep='')))[which(df$pregnancy_indicator == 0 &
                                                              !is.na(df$cd4_count_b) &
                                                              !is.na(df$log10_hiv1_rna_b))]
   df$outcome2 <- NA
   df$outcome2[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] <- outcome2
   df$outcome2 <- factor(df$outcome2)

   dd <<- datadist(df)
   options(datadist='dd')

   fmla.main2 <- as.formula(paste('outcome2 ~ rcs(age_at_t0,3) + month + race_2level.factor + route_of_infection_combined.factor +
                                                    hcp_ind.factor +
                                                   new_cum_hcp + prior_hcv_t0.factor +
                                                   year_of_t0 +
                                                   + baseline_cd4_count_b +
                                                   log10_baseline_vl + prior_hcp_t0 + ',nade_var,sep=''))
   msm2 <- lrm(fmla.main2, data=df, weights=wt, subset=pregnancy_indicator == 0,tol=-10)
   fmla.main3 <- as.formula(paste('outcome2 ~ rcs(age_at_t0,3) + month + race_2level.factor +
                                                    hcp_ind.factor +
                                                   new_cum_hcp +
                                                   prior_hcv_t0.factor +
                                                   route_of_infection_combined.factor +
                                                   + baseline_cd4_count_b +
                                                   log10_baseline_vl + prior_hcp_t0 + ',nade_var,sep=''))
   msm2b <- lrm(fmla.main3, data=df, weights=wt, subset=pregnancy_indicator == 0,tol=-10)


   return(list(denom=denom,
               num=num,
               wt=wt,
               msm=msm,
               msm2=msm2,
               msm2b=msm2b))
}

time.df <- upData(time.df, drop='month_of_nade')
time.df$prior_hcv_t0.factor <- factor(time.df$prior_hcv_t0, levels=c(0,1), labels = c('No','Yes'))
time.df$hcp_ind.factor <- factor(time.df$hcp_ind, levels=c(0,1), labels = c('No','Yes'))

nade <- by_outcomes(max_month = 'max_month_for_nade_outcome', out = 'nade_outcome_indicator', nade_var = 'prior_non_ade.factor',time.df)

nade.df <- subset(time.df, month <= max_month_for_nade_outcome)

dd <- datadist(nade.df)
options(datadist='dd')
j <- nade[['msm2']]
nade.summ <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined.factor = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,30),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No')
nade.summ2 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined.factor = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,40),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No')
nade.summ3 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined.factor = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,50),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No')

order.for.summ <- c(which(rownames(nade.summ) == 'age_at_t0'),
                    which(rownames(nade.summ) == 'month'),
                    which(rownames(nade.summ) == 'new_cum_hcp'),
                    which(rownames(nade.summ) == 'year_of_t0'),
                    which(rownames(nade.summ) == 'baseline_cd4_count_b'),
                    which(rownames(nade.summ) == 'log10_baseline_vl'),
                    which(rownames(nade.summ) == 'prior_hcp_t0'),
                    which(rownames(nade.summ) == 'prior_non_ade.factor - Yes:No'),
                    which(rownames(nade.summ) == 'race_2level.factor - Non-white:White'),
                    which(rownames(nade.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'),
                    which(rownames(nade.summ) == 'hcp_ind.factor - Yes:No'),
                    which(rownames(nade.summ) == 'prior_hcv_t0.factor - Yes:No'))
order.for.summ <- order.for.summ + 1
nade_or <- format(round(nade.summ[order.for.summ,'Effect'],2),nsmall=2)
nade_or2 <- format(round(nade.summ2[2,'Effect'],2),nsmall=2)
nade_or3 <- format(round(nade.summ3[2,'Effect'],2),nsmall=2)

nade_ci <- paste('(',format(round(nade.summ[order.for.summ,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(nade.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='')

nade_ci2 <- paste('(', format(round(nade.summ2[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(nade.summ2[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')
nade_ci3 <- paste('(', format(round(nade.summ3[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(nade.summ3[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')

rownames.anova <- rownames(anova(nade[['msm2']]))

order.for.anova <- c(which(rownames.anova == 'age_at_t0'),
                     which(rownames.anova == 'month'),
                     which(rownames.anova == 'new_cum_hcp'),
                     which(rownames.anova == 'year_of_t0'),
                     which(rownames.anova == 'baseline_cd4_count_b'),
                     which(rownames.anova == 'log10_baseline_vl'),
                     which(rownames.anova == 'prior_hcp_t0'),
                     which(rownames.anova == 'prior_non_ade.factor'),
                     which(rownames.anova == 'race_2level.factor'),
                     which(rownames.anova == 'route_of_infection_combined.factor'),
                     which(rownames.anova == 'hcp_ind.factor'),
                     which(rownames.anova == 'prior_hcv_t0.factor'))
nade_p <- c(anova(nade[['msm2']])[order.for.anova,'P'])
nade_p <- ifelse(nade_p < 0.001, '< 0.001', ifelse(nade_p >= 0.001 & nade_p < 0.01, format(round(nade_p,3),nsmall=2),
                                                   ifelse(nade_p >= 0.01, format(round(nade_p,2),nsmall=2),'')))
nade_p <- ifelse(is.na(nade_p),'', nade_p)

time.df <- upData(time.df, drop='month_of_mh_nade')
mh_nade.mod <- by_outcomes(max_month = 'max_month_for_mh_nade_outcome', out = 'mh_nade_outcome_indicator',
                       nade_var = 'prior_mh_non_ade.factor', time.df)
mh_nade.df <- subset(time.df, month <= max_month_for_mh_nade_outcome)

dd <- datadist(mh_nade.df)
options(datadist = 'dd')
j <- mh_nade.mod[['msm2']]

mh_nade.summ <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,30),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No')
mh_nade.summ2 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,40),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No')
mh_nade.summ3 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,50),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No')

order.for.summ <- c(which(rownames(mh_nade.summ) == 'age_at_t0'),
                    which(rownames(mh_nade.summ) == 'month'),
                    which(rownames(mh_nade.summ) == 'new_cum_hcp'),
                    which(rownames(mh_nade.summ) == 'year_of_t0'),
                    which(rownames(mh_nade.summ) == 'baseline_cd4_count_b'),
                    which(rownames(mh_nade.summ) == 'log10_baseline_vl'),
                    which(rownames(mh_nade.summ) == 'prior_hcp_t0'),
                    which(rownames(mh_nade.summ) == 'prior_non_ade.factor - Yes:No'),
                    which(rownames(mh_nade.summ) == 'race_2level.factor - Non-white:White'),
                    which(rownames(mh_nade.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'),
                    which(rownames(mh_nade.summ) == 'hcp_ind.factor - Yes:No'),
                    which(rownames(mh_nade.summ) == 'prior_hcv_t0.factor - Yes:No'))
order.for.summ <- order.for.summ + 1
mh_nade_or <- format(round(mh_nade.summ[order.for.summ,'Effect'],2),nsmall=2)
mh_nade_or2 <- format(round(mh_nade.summ2[2,'Effect'],2),nsmall=2)
mh_nade_or3 <- format(round(mh_nade.summ3[2,'Effect'],2),nsmall=2)


mh_nade_ci <- paste('(',format(round(mh_nade.summ[order.for.summ,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='')
mh_nade_ci2 <- paste('(', format(round(mh_nade.summ2[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade.summ2[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')
mh_nade_ci3 <- paste('(', format(round(mh_nade.summ3[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade.summ3[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')

rownames.anova <- rownames(anova(mh_nade.mod[['msm2']]))

order.for.anova <- c(which(rownames.anova == 'age_at_t0'),
                     which(rownames.anova == 'month'),
                     which(rownames.anova == 'new_cum_hcp'),
                     which(rownames.anova == 'year_of_t0'),
                     which(rownames.anova == 'baseline_cd4_count_b'),
                     which(rownames.anova == 'log10_baseline_vl'),
                     which(rownames.anova == 'prior_hcp_t0'),
                     which(rownames.anova == 'prior_mh_non_ade.factor'),
                     which(rownames.anova == 'race_2level.factor'),
                     which(rownames.anova == 'route_of_infection_combined.factor'),
                     which(rownames.anova == 'hcp_ind.factor'),
                     which(rownames.anova == 'prior_hcv_t0.factor'))
mh_nade_p <- c(anova(mh_nade.mod[['msm2']])[order.for.anova,'P'])

mh_nade_p <- ifelse(mh_nade_p < 0.001, '< 0.001', ifelse(mh_nade_p >= 0.001 & mh_nade_p < 0.01, format(round(mh_nade_p,3),nsmall=2),
                                                   ifelse(mh_nade_p >= 0.01, format(round(mh_nade_p,2),nsmall=2),'')))
mh_nade_p <- ifelse(is.na(mh_nade_p),'', mh_nade_p)


mh_nade_no_sa.mod <- by_outcomes(max_month = 'max_month_for_mh_nade_no_sa_outcome', out = 'mh_nade_no_sa_outcome_indicator',
                       nade_var = 'prior_mh_non_ade_no_sa.factor', time.df)
mh_nade_no_sa.df <- subset(time.df, month <= max_month_for_mh_nade_no_sa_outcome)

dd <- datadist(mh_nade_no_sa.df)
options(datadist = 'dd')
j <- mh_nade_no_sa.mod[['msm2']]

mh_nade_no_sa.summ <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,30),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No')
mh_nade_no_sa.summ2 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,40),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No')
mh_nade_no_sa.summ3 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,50),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No')


order.for.summ <- c(which(rownames(mh_nade_no_sa.summ) == 'age_at_t0'),
                    which(rownames(mh_nade_no_sa.summ) == 'month'),
                    which(rownames(mh_nade_no_sa.summ) == 'new_cum_hcp'),
                    which(rownames(mh_nade_no_sa.summ) == 'year_of_t0'),
                    which(rownames(mh_nade_no_sa.summ) == 'baseline_cd4_count_b'),
                    which(rownames(mh_nade_no_sa.summ) == 'log10_baseline_vl'),
                    which(rownames(mh_nade_no_sa.summ) == 'prior_hcp_t0'),
                    which(rownames(mh_nade_no_sa.summ) == 'prior_mh_non_ade_no_sa.factor - Yes:No'),
                    which(rownames(mh_nade_no_sa.summ) == 'race_2level.factor - Non-white:White'),
                    which(rownames(mh_nade_no_sa.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'),
                    which(rownames(mh_nade_no_sa.summ) == 'hcp_ind.factor - Yes:No'),
                    which(rownames(mh_nade_no_sa.summ) == 'prior_hcv_t0.factor - Yes:No'))
order.for.summ <- order.for.summ + 1
mh_nade_no_sa_or <- format(round(mh_nade_no_sa.summ[order.for.summ,'Effect'],2),nsmall=2)

mh_nade_no_sa_or2 <- format(round(mh_nade_no_sa.summ2[2,'Effect'],2),nsmall=2)
mh_nade_no_sa_or3 <- format(round(mh_nade_no_sa.summ3[2,'Effect'],2),nsmall=2)

mh_nade_no_sa_ci <- paste('(',format(round(mh_nade_no_sa.summ[order.for.summ,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_no_sa.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='')
mh_nade_no_sa_ci2 <- paste('(', format(round(mh_nade_no_sa.summ2[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_no_sa.summ2[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')
mh_nade_no_sa_ci3 <- paste('(', format(round(mh_nade_no_sa.summ3[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_no_sa.summ3[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')

rownames.anova <- rownames(anova(mh_nade_no_sa.mod[['msm2']]))

order.for.anova <- c(which(rownames.anova == 'age_at_t0'),
                     which(rownames.anova == 'month'),
                     which(rownames.anova == 'new_cum_hcp'),
                     which(rownames.anova == 'year_of_t0'),
                     which(rownames.anova == 'baseline_cd4_count_b'),
                     which(rownames.anova == 'log10_baseline_vl'),
                     which(rownames.anova == 'prior_hcp_t0'),
                     which(rownames.anova == 'prior_mh_non_ade_no_sa.factor'),
                     which(rownames.anova == 'race_2level.factor'),
                     which(rownames.anova == 'route_of_infection_combined.factor'),
                     which(rownames.anova == 'hcp_ind.factor'),
                     which(rownames.anova == 'prior_hcv_t0.factor'))
mh_nade_no_sa_p <- c(anova(mh_nade_no_sa.mod[['msm2']])[order.for.anova,'P'])

mh_nade_no_sa_p <- ifelse(mh_nade_no_sa_p < 0.001, '< 0.001', ifelse(mh_nade_no_sa_p >= 0.001 & mh_nade_no_sa_p < 0.01, format(round(mh_nade_no_sa_p,3),nsmall=2),
                                                   ifelse(mh_nade_no_sa_p >= 0.01, format(round(mh_nade_no_sa_p,2),nsmall=2),'')))
mh_nade_no_sa_p <- ifelse(is.na(mh_nade_no_sa_p),'', mh_nade_no_sa_p)

#---------------------------------------------#
mh_nade_only.mod <- by_outcomes(max_month = 'max_month_for_mh_nade_only_outcome', out = 'mh_nade_only_outcome_indicator',
                       nade_var = 'prior_mh_non_ade_only.factor', time.df)
mh_nade_only.df <- subset(time.df, month <= max_month_for_mh_nade_only_outcome)

dd <- datadist(mh_nade_only.df)
options(datadist = 'dd')
j <- mh_nade_only.mod[['msm2']]

mh_nade_only.summ <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,30),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No')
mh_nade_only.summ2 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,40),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No')
mh_nade_only.summ3 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,50),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No')


order.for.summ <- c(which(rownames(mh_nade_only.summ) == 'age_at_t0'),
                    which(rownames(mh_nade_only.summ) == 'month'),
                    which(rownames(mh_nade_only.summ) == 'new_cum_hcp'),
                    which(rownames(mh_nade_only.summ) == 'year_of_t0'),
                    which(rownames(mh_nade_only.summ) == 'baseline_cd4_count_b'),
                    which(rownames(mh_nade_only.summ) == 'log10_baseline_vl'),
                    which(rownames(mh_nade_only.summ) == 'prior_hcp_t0'),
                    which(rownames(mh_nade_only.summ) == 'prior_mh_non_ade_only.factor - Yes:No'),
                    which(rownames(mh_nade_only.summ) == 'race_2level.factor - Non-white:White'),
                    which(rownames(mh_nade_only.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'),
                    which(rownames(mh_nade_only.summ) == 'hcp_ind.factor - Yes:No'),
                    which(rownames(mh_nade_only.summ) == 'prior_hcv_t0.factor - Yes:No'))
order.for.summ <- order.for.summ + 1
mh_nade_only_or <- format(round(mh_nade_only.summ[order.for.summ,'Effect'],2),nsmall=2)
mh_nade_only_or2 <- format(round(mh_nade_only.summ2[2,'Effect'],2),nsmall=2)
mh_nade_only_or3 <- format(round(mh_nade_only.summ3[2,'Effect'],2),nsmall=2)

mh_nade_only_ci <- paste('(',format(round(mh_nade_only.summ[order.for.summ,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_only.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='')

mh_nade_only_ci2 <- paste('(', format(round(mh_nade_only.summ2[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_only.summ2[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')
mh_nade_only_ci3 <- paste('(', format(round(mh_nade_only.summ3[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_only.summ3[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')

rownames.anova <- rownames(anova(mh_nade_only.mod[['msm2']]))

order.for.anova <- c(which(rownames.anova == 'age_at_t0'),
                     which(rownames.anova == 'month'),
                     which(rownames.anova == 'new_cum_hcp'),
                     which(rownames.anova == 'year_of_t0'),
                     which(rownames.anova == 'baseline_cd4_count_b'),
                     which(rownames.anova == 'log10_baseline_vl'),
                     which(rownames.anova == 'prior_hcp_t0'),
                     which(rownames.anova == 'prior_mh_non_ade_only.factor'),
                     which(rownames.anova == 'race_2level.factor'),
                     which(rownames.anova == 'route_of_infection_combined.factor'),
                     which(rownames.anova == 'hcp_ind.factor'),
                     which(rownames.anova == 'prior_hcv_t0.factor'))
mh_nade_only_p <- c(anova(mh_nade_only.mod[['msm2']])[order.for.anova,'P'])

mh_nade_only_p <- ifelse(mh_nade_only_p < 0.001, '< 0.001', ifelse(mh_nade_only_p >= 0.001 & mh_nade_only_p < 0.01,
                                                                   format(round(mh_nade_only_p,3),nsmall=2),
                                                   ifelse(mh_nade_only_p >= 0.01, format(round(mh_nade_only_p,2),nsmall=2),'')))
mh_nade_only_p <- ifelse(is.na(mh_nade_only_p),'', mh_nade_only_p)

#---------------------------------------------#


nade_dth <- by_outcomes(max_month = 'max_month_for_nade_dth_outcome', out = 'nade_dth_outcome_indicator',
                        nade_var = 'prior_non_ade.factor', time.df)
nade_dth.df <- subset(time.df, month <= max_month_for_nade_dth_outcome)

dd <- datadist(nade_dth.df)
options(datadist = 'dd')
j <- nade_dth[['msm2']]

nade_dth.summ <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,30),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No')
nade_dth.summ2 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,40),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No')
nade_dth.summ3 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,50),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No')


order.for.summ <- c(which(rownames(nade_dth.summ) == 'age_at_t0'),
                    which(rownames(nade_dth.summ) == 'month'),
                    which(rownames(nade_dth.summ) == 'new_cum_hcp'),
                    which(rownames(nade_dth.summ) == 'year_of_t0'),
                    which(rownames(nade_dth.summ) == 'baseline_cd4_count_b'),
                    which(rownames(nade_dth.summ) == 'log10_baseline_vl'),
                    which(rownames(nade_dth.summ) == 'prior_hcp_t0'),
                    which(rownames(nade_dth.summ) == 'prior_non_ade.factor - Yes:No'),
                    which(rownames(nade_dth.summ) == 'race_2level.factor - Non-white:White'),
                    which(rownames(nade_dth.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'),
                    which(rownames(nade_dth.summ) == 'hcp_ind.factor - Yes:No'),
                    which(rownames(nade_dth.summ) == 'prior_hcv_t0.factor - Yes:No'))
order.for.summ <- order.for.summ + 1
nade_dth_or <- format(round(nade_dth.summ[order.for.summ,'Effect'],2),nsmall=2)
nade_dth_or2 <- format(round(nade_dth.summ2[2,'Effect'],2),nsmall=2)
nade_dth_or3 <- format(round(nade_dth.summ3[2,'Effect'],2),nsmall=2)

nade_dth_ci <- paste('(',format(round(nade_dth.summ[order.for.summ,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(nade_dth.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='')

nade_dth_ci2 <- paste('(', format(round(nade_dth.summ2[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(nade_dth.summ2[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')
nade_dth_ci3 <- paste('(', format(round(nade_dth.summ3[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(nade_dth.summ3[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')

rownames.anova <- rownames(anova(nade_dth[['msm2']]))

order.for.anova <- c(which(rownames.anova == 'age_at_t0'),
                     which(rownames.anova == 'month'),
                     which(rownames.anova == 'new_cum_hcp'),
                     which(rownames.anova == 'year_of_t0'),
                     which(rownames.anova == 'baseline_cd4_count_b'),
                     which(rownames.anova == 'log10_baseline_vl'),
                     which(rownames.anova == 'prior_hcp_t0'),
                     which(rownames.anova == 'prior_non_ade.factor'),
                     which(rownames.anova == 'race_2level.factor'),
                     which(rownames.anova == 'route_of_infection_combined.factor'),
                     which(rownames.anova == 'hcp_ind.factor'),
                     which(rownames.anova == 'prior_hcv_t0.factor'))
nade_dth_p <- c(anova(nade_dth[['msm2']])[order.for.anova,'P'])

nade_dth_p <- ifelse(nade_dth_p < 0.001, '< 0.001', ifelse(nade_dth_p >= 0.001 & nade_dth_p < 0.01, format(round(nade_dth_p,3),nsmall=2),
                                                   ifelse(nade_dth_p >= 0.01, format(round(nade_dth_p,2),nsmall=2),'')))
nade_dth_p <- ifelse(is.na(nade_dth_p),'', nade_dth_p)


mh_nade_dth <- by_outcomes(max_month = 'max_month_for_mh_nade_dth_outcome',
                           out = 'mh_nade_dth_outcome_indicator',
                           nade_var = 'prior_mh_non_ade.factor', time.df)
mh_nade_dth.df <- subset(time.df, month <= max_month_for_mh_nade_dth_outcome)
dd <- datadist(mh_nade_dth.df)
options(datadist = 'dd')
j <- mh_nade_dth[['msm2']]

mh_nade_dth.summ <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,30),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No')
mh_nade_dth.summ2 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,40),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No')
mh_nade_dth.summ3 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,50),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No')


order.for.summ <- c(which(rownames(mh_nade_dth.summ) == 'age_at_t0'),
                    which(rownames(mh_nade_dth.summ) == 'month'),
                    which(rownames(mh_nade_dth.summ) == 'new_cum_hcp'),
                    which(rownames(mh_nade_dth.summ) == 'year_of_t0'),
                    which(rownames(mh_nade_dth.summ) == 'baseline_cd4_count_b'),
                    which(rownames(mh_nade_dth.summ) == 'log10_baseline_vl'),
                    which(rownames(mh_nade_dth.summ) == 'prior_hcp_t0'),
                    which(rownames(mh_nade_dth.summ) == 'prior_mh_non_ade.factor - Yes:No'),
                    which(rownames(mh_nade_dth.summ) == 'race_2level.factor - Non-white:White'),
                    which(rownames(mh_nade_dth.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'),
                    which(rownames(mh_nade_dth.summ) == 'hcp_ind.factor - Yes:No'),
                    which(rownames(mh_nade_dth.summ) == 'prior_hcv_t0.factor - Yes:No'))
order.for.summ <- order.for.summ + 1
mh_nade_dth_or <- format(round(mh_nade_dth.summ[order.for.summ,'Effect'],2),nsmall=2)
mh_nade_dth_or2 <- format(round(mh_nade_dth.summ2[2,'Effect'],2),nsmall=2)
mh_nade_dth_or3 <- format(round(mh_nade_dth.summ3[2,'Effect'],2),nsmall=2)

mh_nade_dth_ci <- paste('(',format(round(mh_nade_dth.summ[order.for.summ,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_dth.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='')
mh_nade_dth_ci2 <- paste('(', format(round(mh_nade_dth.summ2[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_dth.summ2[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')
mh_nade_dth_ci3 <- paste('(', format(round(mh_nade_dth.summ3[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_dth.summ3[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')

rownames.anova <- rownames(anova(mh_nade_dth[['msm2']]))


order.for.anova <- c(which(rownames.anova == 'age_at_t0'),
                     which(rownames.anova == 'month'),
                     which(rownames.anova == 'new_cum_hcp'),
                     which(rownames.anova == 'year_of_t0'),
                     which(rownames.anova == 'baseline_cd4_count_b'),
                     which(rownames.anova == 'log10_baseline_vl'),
                     which(rownames.anova == 'prior_hcp_t0'),
                     which(rownames.anova == 'prior_mh_non_ade.factor'),
                     which(rownames.anova == 'race_2level.factor'),
                     which(rownames.anova == 'route_of_infection_combined.factor'),
                     which(rownames.anova == 'hcp_ind.factor'),
                     which(rownames.anova == 'prior_hcv_t0.factor'))
mh_nade_dth_p <- c(anova(mh_nade_dth[['msm2']])[order.for.anova,'P'])

mh_nade_dth_p <- ifelse(mh_nade_dth_p < 0.001, '< 0.001', ifelse(mh_nade_dth_p >= 0.001 & mh_nade_dth_p < 0.01,
                                                                 format(round(mh_nade_dth_p,3),nsmall=2),
                                                   ifelse(mh_nade_dth_p >= 0.01,
                                                          format(round(mh_nade_dth_p,2),nsmall=2),'')))
mh_nade_dth_p <- ifelse(is.na(mh_nade_dth_p),'', mh_nade_dth_p)

#----------------------------------------------------#
mh_nade_only_dth <- by_outcomes(max_month = 'max_month_for_mh_nade_only_dth_outcome',
                           out = 'mh_nade_only_dth_outcome_indicator',
                           nade_var = 'prior_mh_non_ade_only.factor', time.df)
mh_nade_only_dth.df <- subset(time.df, month <= max_month_for_mh_nade_only_dth_outcome)
dd <- datadist(mh_nade_only_dth.df)
options(datadist = 'dd')
j <- mh_nade_only_dth[['msm2']]


mh_nade_only_dth.summ <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,30),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No')
mh_nade_only_dth.summ2 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,40),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No')
mh_nade_only_dth.summ3 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,50),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No')


order.for.summ <- c(which(rownames(mh_nade_only_dth.summ) == 'age_at_t0'),
                    which(rownames(mh_nade_only_dth.summ) == 'month'),
                    which(rownames(mh_nade_only_dth.summ) == 'new_cum_hcp'),
                    which(rownames(mh_nade_only_dth.summ) == 'year_of_t0'),
                    which(rownames(mh_nade_only_dth.summ) == 'baseline_cd4_count_b'),
                    which(rownames(mh_nade_only_dth.summ) == 'log10_baseline_vl'),
                    which(rownames(mh_nade_only_dth.summ) == 'prior_hcp_t0'),
                    which(rownames(mh_nade_only_dth.summ) == 'prior_mh_non_ade_only.factor - Yes:No'),
                    which(rownames(mh_nade_only_dth.summ) == 'race_2level.factor - Non-white:White'),
                    which(rownames(mh_nade_only_dth.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'),
                    which(rownames(mh_nade_only_dth.summ) == 'hcp_ind.factor - Yes:No'),
                    which(rownames(mh_nade_only_dth.summ) == 'prior_hcv_t0.factor - Yes:No'))
order.for.summ <- order.for.summ + 1
mh_nade_only_dth_or <- format(round(mh_nade_only_dth.summ[order.for.summ,'Effect'],2),nsmall=2)
mh_nade_only_dth_or2 <- format(round(mh_nade_only_dth.summ2[2,'Effect'],2),nsmall=2)
mh_nade_only_dth_or3 <- format(round(mh_nade_only_dth.summ3[2,'Effect'],2),nsmall=2)

mh_nade_only_dth_ci <- paste('(',format(round(mh_nade_only_dth.summ[order.for.summ,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_only_dth.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='')

mh_nade_only_dth_ci2 <- paste('(', format(round(mh_nade_only_dth.summ2[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_only_dth.summ2[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')
mh_nade_only_dth_ci3 <- paste('(', format(round(mh_nade_only_dth.summ3[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_only_dth.summ3[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')

rownames.anova <- rownames(anova(mh_nade_only_dth[['msm2']]))

order.for.anova <- c(which(rownames.anova == 'age_at_t0'),
                     which(rownames.anova == 'month'),
                     which(rownames.anova == 'new_cum_hcp'),
                     which(rownames.anova == 'year_of_t0'),
                     which(rownames.anova == 'baseline_cd4_count_b'),
                     which(rownames.anova == 'log10_baseline_vl'),
                     which(rownames.anova == 'prior_hcp_t0'),
                     which(rownames.anova == 'prior_mh_non_ade_only.factor'),
                     which(rownames.anova == 'race_2level.factor'),
                     which(rownames.anova == 'route_of_infection_combined.factor'),
                     which(rownames.anova == 'hcp_ind.factor'),
                     which(rownames.anova == 'prior_hcv_t0.factor'))
mh_nade_only_dth_p <- c(anova(mh_nade_only_dth[['msm2']])[order.for.anova,'P'])

mh_nade_only_dth_p <- ifelse(mh_nade_only_dth_p < 0.001, '< 0.001', ifelse(mh_nade_only_dth_p >= 0.001 & mh_nade_only_dth_p < 0.01,
                                                                 format(round(mh_nade_only_dth_p,3),nsmall=2),
                                                   ifelse(mh_nade_only_dth_p >= 0.01,
                                                          format(round(mh_nade_only_dth_p,2),nsmall=2),'')))
mh_nade_only_dth_p <- ifelse(is.na(mh_nade_only_dth_p),'', mh_nade_only_dth_p)


mh_nade_no_sa_dth <- by_outcomes(max_month = 'max_month_for_mh_nade_no_sa_dth_outcome',
                           out = 'mh_nade_no_sa_dth_outcome_indicator',
                           nade_var = 'prior_mh_non_ade_no_sa.factor', time.df)
mh_nade_no_sa_dth.df <- subset(time.df, month <= max_month_for_mh_nade_no_sa_dth_outcome)
dd <- datadist(mh_nade_no_sa_dth.df)
options(datadist = 'dd')
j <- mh_nade_no_sa_dth[['msm2']]

mh_nade_no_sa_dth.summ <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,30),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No')
mh_nade_no_sa_dth.summ2 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,40),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No')
mh_nade_no_sa_dth.summ3 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,50),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No')


order.for.summ <- c(which(rownames(mh_nade_no_sa_dth.summ) == 'age_at_t0'),
                    which(rownames(mh_nade_no_sa_dth.summ) == 'month'),
                    which(rownames(mh_nade_no_sa_dth.summ) == 'new_cum_hcp'),
                    which(rownames(mh_nade_no_sa_dth.summ) == 'year_of_t0'),
                    which(rownames(mh_nade_no_sa_dth.summ) == 'baseline_cd4_count_b'),
                    which(rownames(mh_nade_no_sa_dth.summ) == 'log10_baseline_vl'),
                    which(rownames(mh_nade_no_sa_dth.summ) == 'prior_hcp_t0'),
                    which(rownames(mh_nade_no_sa_dth.summ) == 'prior_mh_non_ade_no_sa.factor - Yes:No'),
                    which(rownames(mh_nade_no_sa_dth.summ) == 'race_2level.factor - Non-white:White'),
                    which(rownames(mh_nade_no_sa_dth.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'),
                    which(rownames(mh_nade_no_sa_dth.summ) == 'hcp_ind.factor - Yes:No'),
                    which(rownames(mh_nade_no_sa_dth.summ) == 'prior_hcv_t0.factor - Yes:No'))
order.for.summ <- order.for.summ + 1
mh_nade_no_sa_dth_or <- format(round(mh_nade_no_sa_dth.summ[order.for.summ,'Effect'],2),nsmall=2)
mh_nade_no_sa_dth_or2 <- format(round(mh_nade_no_sa_dth.summ2[2,'Effect'],2),nsmall=2)
mh_nade_no_sa_dth_or3 <- format(round(mh_nade_no_sa_dth.summ3[2,'Effect'],2),nsmall=2)

mh_nade_no_sa_dth_ci <- paste('(',format(round(mh_nade_no_sa_dth.summ[order.for.summ,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_no_sa_dth.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='')
mh_nade_no_sa_dth_ci2 <- paste('(', format(round(mh_nade_no_sa_dth.summ2[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_no_sa_dth.summ2[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')
mh_nade_no_sa_dth_ci3 <- paste('(', format(round(mh_nade_no_sa_dth.summ3[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_no_sa_dth.summ3[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')

rownames.anova <- rownames(anova(mh_nade_no_sa_dth[['msm2']]))

order.for.anova <- c(which(rownames.anova == 'age_at_t0'),
                     which(rownames.anova == 'month'),
                     which(rownames.anova == 'new_cum_hcp'),
                     which(rownames.anova == 'year_of_t0'),
                     which(rownames.anova == 'baseline_cd4_count_b'),
                     which(rownames.anova == 'log10_baseline_vl'),
                     which(rownames.anova == 'prior_hcp_t0'),
                     which(rownames.anova == 'prior_mh_non_ade_no_sa.factor'),
                     which(rownames.anova == 'race_2level.factor'),
                     which(rownames.anova == 'route_of_infection_combined.factor'),
                     which(rownames.anova == 'hcp_ind.factor'),
                     which(rownames.anova == 'prior_hcv_t0.factor'))
mh_nade_no_sa_dth_p <- c(anova(mh_nade_no_sa_dth[['msm2']])[order.for.anova,'P'])

mh_nade_no_sa_dth_p <- ifelse(mh_nade_no_sa_dth_p < 0.001, '< 0.001', ifelse(mh_nade_no_sa_dth_p >= 0.001 & mh_nade_no_sa_dth_p < 0.01,
                                                                 format(round(mh_nade_no_sa_dth_p,3),nsmall=2),
                                                   ifelse(mh_nade_no_sa_dth_p >= 0.01,
                                                          format(round(mh_nade_no_sa_dth_p,2),nsmall=2),'')))
mh_nade_no_sa_dth_p <- ifelse(is.na(mh_nade_no_sa_dth_p),'', mh_nade_no_sa_dth_p)

death <- by_outcomes(max_month = 'max_month_for_death_outcome', out = 'death_outcome_indicator',
                     nade_var = 'prior_non_ade.factor', time.df)
death.df <- subset(time.df, month <= max_month_for_death_outcome)
dd <- datadist(death.df)
options(datadist = 'dd')
dd <- datadist(death.df)
options(datadist='dd')
j <- death[['msm2']]

death.summ <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(50,55),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No')
death_or <- format(round(death.summ[which(rownames(death.summ) == ' Odds Ratio'),'Effect'],2),nsmall=2)
death_ci <- paste('(', format(round(death.summ[which(rownames(death.summ) == ' Odds Ratio'),'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(death.summ[which(rownames(death.summ) == ' Odds Ratio'),'Upper 0.95'],2),nsmall=2),
                 ')',sep='')
rownames.anova <- rownames(anova(death[['msm2']]))

order.for.anova <- c(which(rownames.anova == 'age_at_t0'),
                     which(rownames.anova == 'month'),
                     which(rownames.anova == 'new_cum_hcp'),
                     which(rownames.anova == 'year_of_t0'),
                     which(rownames.anova == 'baseline_cd4_count_b'),
                     which(rownames.anova == 'log10_baseline_vl'),
                     which(rownames.anova == 'prior_hcp_t0'),
                     which(rownames.anova == 'prior_non_ade.factor'),
                     which(rownames.anova == 'race_2level.factor'),
                     which(rownames.anova == 'route_of_infection_combined.factor'),
                     which(rownames.anova == 'hcp_ind.factor'),
                     which(rownames.anova == 'prior_hcv_t0.factor'))
death_p <- c(anova(death[['msm2']])[order.for.anova[1:7],'P'],
            NA,
            anova(death[['msm2']])[order.for.anova[8],'P'],
            NA,
            anova(death[['msm2']])[order.for.anova[9:10],'P'])
death_p <- ifelse(death_p < 0.001, '< 0.001', ifelse(death_p >= 0.001 & death_p < 0.01, round(death_p,3),
                                                   ifelse(death_p >= 0.01, round(death_p,2),'')))
death_p <- ifelse(is.na(death_p),'', death_p)

# Just need a simple model with death as the outcome
df <- subset(time.df, month <= max_month_for_death_outcome)
df <- time.df[which(time.df$month <= time.df[['max_month_for_death_outcome']]),]
wt.mod.denom <- glm(hcp_ind.factor ~ age_at_t0 + rcs(month,4) + race_2level.factor + rcs(cd4_count_b,3) +
                                       (1-undetectable_vl)*log10_hiv1_rna_b + haart_ind + reg_indicator + route_of_infection_combined.factor +
                                       prior_hcv_t0.factor + year_of_t0 + ade_indicator + prior_hcp_t0 + prior_non_ade.factor,
                       data=df, subset=pregnancy_indicator == 0, family='binomial')
denom <- exp(predict(wt.mod.denom))/(1+exp(predict(wt.mod.denom)))

#plus time-invariant covariates from above
wt.mod.num <- glm(hcp_ind.factor ~ rcs(month,4), data=df,
                  subset=pregnancy_indicator == 0 & !is.na(cd4_count_b) & !is.na(log10_hiv1_rna_b),family='binomial')
num <- exp(predict(wt.mod.num))/(1+exp(predict(wt.mod.num)))

wt <- ifelse(df$hcp_ind[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] == 1,
             num/denom, (1-num)/(1-denom))

quant.wt <- quantile(wt, probs=c(0.025,0.975))
wt <- ifelse(wt < quant.wt[1],quant.wt[1],
             ifelse(wt > quant.wt[2],quant.wt[2],wt))
             
outcome <- df$death_outcome_indicator[which(df$pregnancy_indicator == 0 &
                                            !is.na(df$cd4_count_b) &
                                            !is.na(df$log10_hiv1_rna_b))]
hcp_ind.factor <- df$hcp_ind.factor[which(df$pregnancy_indicator == 0 &
                                                !is.na(df$cd4_count_b) &
                                                !is.na(df$log10_hiv1_rna_b))]
new_cum_hcp <- df$new_cum_hcp[which(df$pregnancy_indicator == 0 &
                                    !is.na(df$cd4_count_b) &
                                    !is.na(df$log10_hiv1_rna_b))]

pregnancy_indicator <- df$pregnancy_indicator[which(df$pregnancy_indicator == 0 &
                                                    !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))]

msm <- glm(outcome ~ hcp_ind.factor + new_cum_hcp, weights=wt,
                        family='quasibinomial',subset=pregnancy_indicator == 0)

outcome2 <- factor(outcome)
dd <<- datadist(df)
options(datadist='dd')

msm2 <- lrm(outcome2 ~ hcp_ind.factor + new_cum_hcp,weights=wt,
               subset=pregnancy_indicator == 0,tol=-10)


death.simple.summ <- summary(msm2, hcp_ind.factor = 'No',new_cum_hcp = c(0,1))

death_simple_or <- format(round(death.simple.summ[which(rownames(death.simple.summ) == ' Odds Ratio'),'Effect'],2),nsmall=2)
death_simple_ci <- paste('(', format(round(death.simple.summ[which(rownames(death.simple.summ) == ' Odds Ratio'),
                                           'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(death.simple.summ[which(rownames(death.simple.summ) == ' Odds Ratio'),
                                   'Upper 0.95'],2),nsmall=2),')',sep='')
rownames.anova <- rownames(anova(msm2))
order.for.anova <- c(which(rownames.anova == 'new_cum_hcp'),
                     which(rownames.anova == 'hcp_ind.factor'))

death_simple_p <- c(anova(msm2)[order.for.anova[1:2],'P'])
death_simple_p <- ifelse(death_simple_p < 0.001, '< 0.001', ifelse(death_simple_p >= 0.001 & death_simple_p < 0.01,
                                                                   round(death_simple_p,3),
                                                   ifelse(death_simple_p >= 0.01, round(death_simple_p,2),'')))
death_simple_p <- ifelse(is.na(death_simple_p),'', death_simple_p)


by_outcomes_with_depo <- function(max_month, out, nade_var, time.df){
   df <- subset(time.df, month <= max_month)
   df <- time.df[which(time.df$month <= time.df[[max_month]]),]
   fmla.denom <- as.formula(paste('depo_ind.factor ~ age_at_t0 + rcs(month,4) + race_2level.factor + rcs(cd4_count_b,3) +
                                       (1-undetectable_vl)*log10_hiv1_rna_b + haart_ind + reg_indicator + route_of_infection_combined.factor +
                                       prior_hcv_t0.factor + year_of_t0 + ade_indicator + prior_hcp_t0 + ',nade_var,sep=''))
   wt.mod.denom <- glm(fmla.denom,data=df, subset=pregnancy_indicator == 0, family='binomial')
   denom <- exp(predict(wt.mod.denom))/(1+exp(predict(wt.mod.denom)))

   #plus time-invariant covariates from above
   fmla.num <- as.formula(paste('depo_ind.factor ~ rcs(month,4) + age_at_t0 + race_2level.factor + route_of_infection_combined.factor +
                                            prior_hcv_t0.factor + year_of_t0 + prior_hcp_t0 + ',nade_var,sep=''))
   wt.mod.num <- glm(fmla.num,data=df, subset=pregnancy_indicator == 0 & !is.na(cd4_count_b) & !is.na(log10_hiv1_rna_b),family='binomial')
   num <- exp(predict(wt.mod.num))/(1+exp(predict(wt.mod.num)))

   wt <- ifelse(df$hcp_ind[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] == 1,
                num/denom, (1-num)/(1-denom))

   quant.wt <- quantile(wt, probs=c(0.025,0.975))
   wt <- ifelse(wt < quant.wt[1], quant.wt[1],
                ifelse(wt > quant.wt[2],quant.wt[2],wt))

   outcome <- eval(parse(text=paste('df$',out,sep='')))[which(df$pregnancy_indicator == 0 &
                                                              !is.na(df$cd4_count_b) &
                                                              !is.na(df$log10_hiv1_rna_b))]
   df$outcome <- NA
   df$outcome[which(df$pregnancy_indicator == 0)] <- outcome
   df$wt <- NA
   df$wt[which(df$pregnancy_indicator == 0)] <- wt
   fmla.main <- as.formula(paste('outcome ~ rcs(age_at_t0,3) + month + race_2level.factor + route_of_infection_combined.factor + depo_ind.factor +
                        cum_depo + prior_hcv_t0.factor + year_of_t0 + baseline_cd4_count_b +
                        log10_baseline_vl + prior_hcp_t0 + ',nade_var,sep=''))
   msm <- glm(fmla.main, data=df, weights=wt,
                        family='quasibinomial',subset=pregnancy_indicator == 0)
   outcome2 <- eval(parse(text=paste('df$',out,sep='')))[which(df$pregnancy_indicator == 0 &
                                                              !is.na(df$cd4_count_b) &
                                                              !is.na(df$log10_hiv1_rna_b))]
   df$outcome2 <- NA
   df$outcome2[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] <- outcome2
   df$outcome2 <- factor(df$outcome2)

   dd <<- datadist(df)
   options(datadist='dd')


   fmla.main2 <- as.formula(paste('outcome2 ~ rcs(age_at_t0,3) + month + race_2level.factor + route_of_infection_combined.factor +
                                                    depo_ind.factor +
                                                   cum_depo + prior_hcv_t0.factor +
                                                   year_of_t0 +
                                                   + baseline_cd4_count_b +
                                                   log10_baseline_vl + prior_hcp_t0 + ',nade_var,sep=''))
   msm2 <- lrm(fmla.main2, data=df, weights=wt, subset=pregnancy_indicator == 0,tol=-10)

   fmla.main3 <- as.formula(paste('outcome2 ~ rcs(age_at_t0,3) + month + race_2level.factor +
                                                    depo_ind.factor +
                                                   cum_depo +
                                                   prior_hcv_t0.factor +
                                                   route_of_infection_combined.factor +
                                                   + baseline_cd4_count_b +
                                                   log10_baseline_vl + prior_hcp_t0 + ',nade_var,sep=''))
   msm2b <- lrm(fmla.main3, data=df, weights=wt, subset=pregnancy_indicator == 0,tol=-10)


   return(list(denom=denom,
               num=num,
               wt=wt,
               msm=msm,
               msm2=msm2,
               msm2b=msm2b))
}


time.df$depo_ind.factor <- factor(time.df$hcp_depo_ind, levels=c(0,1), labels = c('No','Yes'))

nade.depo <- by_outcomes_with_depo(max_month = 'max_month_for_nade_outcome', out = 'nade_outcome_indicator', nade_var = 'prior_non_ade.factor',time.df)

nade.depo.df <- subset(time.df, month <= max_month_for_nade_outcome)

dd <- datadist(nade.depo.df)
options(datadist='dd')
j <- nade.depo[['msm2']]

nade.depo.summ <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined.factor = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,30),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No')
nade.depo.summ2 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined.factor = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,40),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No')
nade.depo.summ3 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined.factor = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,50),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No')


order.for.summ <- c(which(rownames(nade.depo.summ) == 'age_at_t0'),
                    which(rownames(nade.depo.summ) == 'month'),
                    which(rownames(nade.depo.summ) == 'cum_depo'),
                    which(rownames(nade.depo.summ) == 'year_of_t0'),
                    which(rownames(nade.depo.summ) == 'baseline_cd4_count_b'),
                    which(rownames(nade.depo.summ) == 'log10_baseline_vl'),
                    which(rownames(nade.depo.summ) == 'prior_hcp_t0'),
                    which(rownames(nade.depo.summ) == 'prior_non_ade.factor - Yes:No'),
                    which(rownames(nade.depo.summ) == 'race_2level.factor - Non-white:White'),
                    which(rownames(nade.depo.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'),
                    which(rownames(nade.depo.summ) == 'depo_ind.factor - Yes:No'),
                    which(rownames(nade.depo.summ) == 'prior_hcv_t0.factor - Yes:No'))
order.for.summ <- order.for.summ + 1
nade_depo_or <- format(round(nade.depo.summ[order.for.summ,'Effect'],2),nsmall=2)
nade_depo_or2 <- format(round(nade.depo.summ2[2,'Effect'],2),nsmall=2)
nade_depo_or3 <- format(round(nade.depo.summ3[2,'Effect'],2),nsmall=2)

nade_depo_ci <- paste('(',format(round(nade.depo.summ[order.for.summ,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(nade.depo.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='')

nade_depo_ci2 <- paste('(', format(round(nade.depo.summ2[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(nade.depo.summ2[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')
nade_depo_ci3 <- paste('(', format(round(nade.depo.summ3[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(nade.depo.summ3[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')

rownames.anova <- rownames(anova(nade.depo[['msm2']]))

order.for.anova <- c(which(rownames.anova == 'age_at_t0'),
                     which(rownames.anova == 'month'),
                     which(rownames.anova == 'cum_depo'),
                     which(rownames.anova == 'year_of_t0'),
                     which(rownames.anova == 'baseline_cd4_count_b'),
                     which(rownames.anova == 'log10_baseline_vl'),
                     which(rownames.anova == 'prior_hcp_t0'),
                     which(rownames.anova == 'prior_non_ade.factor'),
                     which(rownames.anova == 'race_2level.factor'),
                     which(rownames.anova == 'route_of_infection_combined.factor'),
                     which(rownames.anova == 'depo_ind.factor'),
                     which(rownames.anova == 'prior_hcv_t0.factor'))
nade_depo_p <- c(anova(nade.depo[['msm2']])[order.for.anova,'P'])

nade_depo_p <- ifelse(nade_depo_p < 0.001, '< 0.001', ifelse(nade_depo_p >= 0.001 & nade_depo_p < 0.01, format(round(nade_depo_p,3),nsmall=2),
                                                   ifelse(nade_depo_p >= 0.01, format(round(nade_depo_p,2),nsmall=2),'')))
nade_depo_p <- ifelse(is.na(nade_depo_p),'', nade_depo_p)


mh_nade.depo.mod <- by_outcomes_with_depo(max_month = 'max_month_for_mh_nade_outcome', out = 'mh_nade_outcome_indicator',
                       nade_var = 'prior_mh_non_ade.factor', time.df)
mh_nade.depo.df <- subset(time.df, month <= max_month_for_mh_nade_outcome)

dd <- datadist(mh_nade.depo.df)
options(datadist = 'dd')
dd <- datadist(mh_nade.depo.df)
options(datadist='dd')
j <- mh_nade.depo.mod[['msm2']]

mh_nade.depo.summ <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,30),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No')
mh_nade.depo.summ2 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,40),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No')
mh_nade.depo.summ3 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,50),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No')


order.for.summ <- c(which(rownames(mh_nade.depo.summ) == 'age_at_t0'),
                    which(rownames(mh_nade.depo.summ) == 'month'),
                    which(rownames(mh_nade.depo.summ) == 'cum_depo'),
                    which(rownames(mh_nade.depo.summ) == 'year_of_t0'),
                    which(rownames(mh_nade.depo.summ) == 'baseline_cd4_count_b'),
                    which(rownames(mh_nade.depo.summ) == 'log10_baseline_vl'),
                    which(rownames(mh_nade.depo.summ) == 'prior_hcp_t0'),
                    which(rownames(mh_nade.depo.summ) == 'prior_mh_non_ade.factor - Yes:No'),
                    which(rownames(mh_nade.depo.summ) == 'race_2level.factor - Non-white:White'),
                    which(rownames(mh_nade.depo.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'),
                    which(rownames(mh_nade.depo.summ) == 'depo_ind.factor - Yes:No'),
                    which(rownames(mh_nade.depo.summ) == 'prior_hcv_t0.factor - Yes:No'))
order.for.summ <- order.for.summ + 1
mh_nade_depo_or <- format(round(mh_nade.depo.summ[order.for.summ,'Effect'],2),nsmall=2)
mh_nade_depo_or2 <- format(round(mh_nade.depo.summ2[2,'Effect'],2),nsmall=2)
mh_nade_depo_or3 <- format(round(mh_nade.depo.summ3[2,'Effect'],2),nsmall=2)

mh_nade_depo_ci <- paste('(',format(round(mh_nade.depo.summ[order.for.summ,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade.depo.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='')
mh_nade_depo_ci2 <- paste('(', format(round(mh_nade.depo.summ2[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade.depo.summ2[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')
mh_nade_depo_ci3 <- paste('(', format(round(mh_nade.depo.summ3[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade.depo.summ3[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')

rownames.anova <- rownames(anova(mh_nade.depo.mod[['msm2']]))

order.for.anova <- c(which(rownames.anova == 'age_at_t0'),
                     which(rownames.anova == 'month'),
                     which(rownames.anova == 'cum_depo'),
                     which(rownames.anova == 'year_of_t0'),
                     which(rownames.anova == 'baseline_cd4_count_b'),
                     which(rownames.anova == 'log10_baseline_vl'),
                     which(rownames.anova == 'prior_hcp_t0'),
                     which(rownames.anova == 'prior_mh_non_ade.factor'),
                     which(rownames.anova == 'race_2level.factor'),
                     which(rownames.anova == 'route_of_infection_combined.factor'),
                     which(rownames.anova == 'depo_ind.factor'),
                     which(rownames.anova == 'prior_hcv_t0.factor'))
mh_nade_depo_p <- c(anova(mh_nade.depo.mod[['msm2']])[order.for.anova,'P'])

mh_nade_depo_p <- ifelse(mh_nade_depo_p < 0.001, '< 0.001', ifelse(mh_nade_depo_p >= 0.001 & mh_nade_depo_p < 0.01,
                                                                   format(round(mh_nade_depo_p,3),nsmall=2),
                                                   ifelse(mh_nade_depo_p >= 0.01, format(round(mh_nade_depo_p,2),nsmall=2),'')))
mh_nade_depo_p <- ifelse(is.na(mh_nade_depo_p),'', mh_nade_depo_p)


#----------------------------------------------#
mh_nade_only.depo.mod <- by_outcomes_with_depo(max_month = 'max_month_for_mh_nade_only_outcome', out = 'mh_nade_only_outcome_indicator',
                       nade_var = 'prior_mh_non_ade_only.factor', time.df)
mh_nade_only.depo.df <- subset(time.df, month <= max_month_for_mh_nade_only_outcome)

dd <- datadist(mh_nade_only.depo.df)
options(datadist = 'dd')
j <- mh_nade_only.depo.mod[['msm2']]

mh_nade_only.depo.summ <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,30),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No')
mh_nade_only.depo.summ2 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,40),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No')
mh_nade_only.depo.summ3 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,50),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No')


order.for.summ <- c(which(rownames(mh_nade_only.depo.summ) == 'age_at_t0'),
                    which(rownames(mh_nade_only.depo.summ) == 'month'),
                    which(rownames(mh_nade_only.depo.summ) == 'cum_depo'),
                    which(rownames(mh_nade_only.depo.summ) == 'year_of_t0'),
                    which(rownames(mh_nade_only.depo.summ) == 'baseline_cd4_count_b'),
                    which(rownames(mh_nade_only.depo.summ) == 'log10_baseline_vl'),
                    which(rownames(mh_nade_only.depo.summ) == 'prior_hcp_t0'),
                    which(rownames(mh_nade_only.depo.summ) == 'prior_mh_non_ade_only.factor - Yes:No'),
                    which(rownames(mh_nade_only.depo.summ) == 'race_2level.factor - Non-white:White'),
                    which(rownames(mh_nade_only.depo.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'),
                    which(rownames(mh_nade_only.depo.summ) == 'depo_ind.factor - Yes:No'),
                    which(rownames(mh_nade_only.depo.summ) == 'prior_hcv_t0.factor - Yes:No'))
order.for.summ <- order.for.summ + 1
mh_nade_only_depo_or <- format(round(mh_nade_only.depo.summ[order.for.summ,'Effect'],2),nsmall=2)
mh_nade_only_depo_or2 <- format(round(mh_nade_only.depo.summ2[2,'Effect'],2),nsmall=2)
mh_nade_only_depo_or3 <- format(round(mh_nade_only.depo.summ3[2,'Effect'],2),nsmall=2)

mh_nade_only_depo_ci <- paste('(',format(round(mh_nade_only.depo.summ[order.for.summ,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_only.depo.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='')

mh_nade_only_depo_ci2 <- paste('(', format(round(mh_nade_only.depo.summ2[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_only.depo.summ2[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')
mh_nade_only_depo_ci3 <- paste('(', format(round(mh_nade_only.depo.summ3[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_only.depo.summ3[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')

rownames.anova <- rownames(anova(mh_nade_only.depo.mod[['msm2']]))

order.for.anova <- c(which(rownames.anova == 'age_at_t0'),
                     which(rownames.anova == 'month'),
                     which(rownames.anova == 'cum_depo'),
                     which(rownames.anova == 'year_of_t0'),
                     which(rownames.anova == 'baseline_cd4_count_b'),
                     which(rownames.anova == 'log10_baseline_vl'),
                     which(rownames.anova == 'prior_hcp_t0'),
                     which(rownames.anova == 'prior_mh_non_ade_only.factor'),
                     which(rownames.anova == 'race_2level.factor'),
                     which(rownames.anova == 'route_of_infection_combined.factor'),
                     which(rownames.anova == 'depo_ind.factor'),
                     which(rownames.anova == 'prior_hcv_t0.factor'))
mh_nade_only_depo_p <- c(anova(mh_nade_only.depo.mod[['msm2']])[order.for.anova,'P'])

mh_nade_only_depo_p <- ifelse(mh_nade_only_depo_p < 0.001, '< 0.001', ifelse(mh_nade_only_depo_p >= 0.001 & mh_nade_only_depo_p < 0.01,
                                                                   format(round(mh_nade_only_depo_p,3),nsmall=2),
                                                   ifelse(mh_nade_only_depo_p >= 0.01, format(round(mh_nade_only_depo_p,2),nsmall=2),'')))
mh_nade_only_depo_p <- ifelse(is.na(mh_nade_only_depo_p),'', mh_nade_only_depo_p)


mh_nade_no_sa.depo.mod <- by_outcomes_with_depo(max_month = 'max_month_for_mh_nade_no_sa_outcome', out = 'mh_nade_no_sa_outcome_indicator',
                       nade_var = 'prior_mh_non_ade_no_sa.factor', time.df)
mh_nade_no_sa.depo.df <- subset(time.df, month <= max_month_for_mh_nade_no_sa_outcome)

dd <- datadist(mh_nade_no_sa.depo.df)
options(datadist = 'dd')
j <- mh_nade_no_sa.depo.mod[['msm2']]

mh_nade_no_sa.depo.summ <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,30),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No')
mh_nade_no_sa.depo.summ2 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,40),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No')
mh_nade_no_sa.depo.summ3 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month = c(10,16),
                     age_at_t0 = c(35,50),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No')


order.for.summ <- c(which(rownames(mh_nade_no_sa.depo.summ) == 'age_at_t0'),
                    which(rownames(mh_nade_no_sa.depo.summ) == 'month'),
                    which(rownames(mh_nade_no_sa.depo.summ) == 'cum_depo'),
                    which(rownames(mh_nade_no_sa.depo.summ) == 'year_of_t0'),
                    which(rownames(mh_nade_no_sa.depo.summ) == 'baseline_cd4_count_b'),
                    which(rownames(mh_nade_no_sa.depo.summ) == 'log10_baseline_vl'),
                    which(rownames(mh_nade_no_sa.depo.summ) == 'prior_hcp_t0'),
                    which(rownames(mh_nade_no_sa.depo.summ) == 'prior_mh_non_ade_no_sa.factor - Yes:No'),
                    which(rownames(mh_nade_no_sa.depo.summ) == 'race_2level.factor - Non-white:White'),
                    which(rownames(mh_nade_no_sa.depo.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'),
                    which(rownames(mh_nade_no_sa.depo.summ) == 'depo_ind.factor - Yes:No'),
                    which(rownames(mh_nade_no_sa.depo.summ) == 'prior_hcv_t0.factor - Yes:No'))
order.for.summ <- order.for.summ + 1
mh_nade_no_sa_depo_or <- format(round(mh_nade_no_sa.depo.summ[order.for.summ,'Effect'],2),nsmall=2)
mh_nade_no_sa_depo_or2 <- format(round(mh_nade_no_sa.depo.summ2[2,'Effect'],2),nsmall=2)
mh_nade_no_sa_depo_or3 <- format(round(mh_nade_no_sa.depo.summ3[2,'Effect'],2),nsmall=2)
mh_nade_no_sa_depo_ci <- paste('(',format(round(mh_nade_no_sa.depo.summ[order.for.summ,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_no_sa.depo.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='')
mh_nade_no_sa_depo_ci2 <- paste('(', format(round(mh_nade_no_sa.depo.summ2[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_no_sa.depo.summ2[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')
mh_nade_no_sa_depo_ci3 <- paste('(', format(round(mh_nade_no_sa.depo.summ3[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_no_sa.depo.summ3[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')

rownames.anova <- rownames(anova(mh_nade_no_sa.depo.mod[['msm2']]))

order.for.anova <- c(which(rownames.anova == 'age_at_t0'),
                     which(rownames.anova == 'month'),
                     which(rownames.anova == 'cum_depo'),
                     which(rownames.anova == 'year_of_t0'),
                     which(rownames.anova == 'baseline_cd4_count_b'),
                     which(rownames.anova == 'log10_baseline_vl'),
                     which(rownames.anova == 'prior_hcp_t0'),
                     which(rownames.anova == 'prior_mh_non_ade_no_sa.factor'),
                     which(rownames.anova == 'race_2level.factor'),
                     which(rownames.anova == 'route_of_infection_combined.factor'),
                     which(rownames.anova == 'depo_ind.factor'),
                     which(rownames.anova == 'prior_hcv_t0.factor'))
mh_nade_no_sa_depo_p <- c(anova(mh_nade_no_sa.depo.mod[['msm2']])[order.for.anova,'P'])

mh_nade_no_sa_depo_p <- ifelse(mh_nade_no_sa_depo_p < 0.001, '< 0.001', ifelse(mh_nade_no_sa_depo_p >= 0.001 & mh_nade_no_sa_depo_p < 0.01,
                                                                   format(round(mh_nade_no_sa_depo_p,3),nsmall=2),
                                                   ifelse(mh_nade_no_sa_depo_p >= 0.01, format(round(mh_nade_no_sa_depo_p,2),nsmall=2),'')))
mh_nade_no_sa_depo_p <- ifelse(is.na(mh_nade_no_sa_depo_p),'', mh_nade_no_sa_depo_p)

#----------------------------------------------#

nade_dth_depo <- by_outcomes_with_depo(max_month = 'max_month_for_nade_dth_outcome', out = 'nade_dth_outcome_indicator',
                        nade_var = 'prior_non_ade.factor', time.df)
nade_dth.depo.df <- subset(time.df, month <= max_month_for_nade_dth_outcome)

dd <- datadist(nade_dth.depo.df)
options(datadist = 'dd')
j <- nade_dth_depo[['msm2']]

nade_dth.depo.summ <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,30),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No')
nade_dth.depo.summ2 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,40),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No')
nade_dth.depo.summ3 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,50),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No')


order.for.summ <- c(which(rownames(nade_dth.depo.summ) == 'age_at_t0'),
                    which(rownames(nade_dth.depo.summ) == 'month'),
                    which(rownames(nade_dth.depo.summ) == 'cum_depo'),
                    which(rownames(nade_dth.depo.summ) == 'year_of_t0'),
                    which(rownames(nade_dth.depo.summ) == 'baseline_cd4_count_b'),
                    which(rownames(nade_dth.depo.summ) == 'log10_baseline_vl'),
                    which(rownames(nade_dth.depo.summ) == 'prior_hcp_t0'),
                    which(rownames(nade_dth.depo.summ) == 'prior_non_ade.factor - Yes:No'),
                    which(rownames(nade_dth.depo.summ) == 'race_2level.factor - Non-white:White'),
                    which(rownames(nade_dth.depo.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'),
                    which(rownames(nade_dth.depo.summ) == 'depo_ind.factor - Yes:No'),
                    which(rownames(nade_dth.depo.summ) == 'prior_hcv_t0.factor - Yes:No'))
order.for.summ <- order.for.summ + 1
nade_dth_depo_or <- format(round(nade_dth.depo.summ[order.for.summ,'Effect'],2),nsmall=2)
# nade_dth_depo_or <- format(round(nade_dth.depo.summ[which(rownames(nade_dth.depo.summ) == ' Odds Ratio'),'Effect'],2),nsmall=2)
nade_dth_depo_or2 <- format(round(nade_dth.depo.summ2[2,'Effect'],2),nsmall=2)
nade_dth_depo_or3 <- format(round(nade_dth.depo.summ3[2,'Effect'],2),nsmall=2)

nade_dth_depo_ci <- paste('(',format(round(nade_dth.depo.summ[order.for.summ,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(nade_dth.depo.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='')

nade_dth_depo_ci2 <- paste('(', format(round(nade_dth.depo.summ2[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(nade_dth.depo.summ2[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')
nade_dth_depo_ci3 <- paste('(', format(round(nade_dth.depo.summ3[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(nade_dth.depo.summ3[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')

rownames.anova <- rownames(anova(nade_dth_depo[['msm2']]))

order.for.anova <- c(which(rownames.anova == 'age_at_t0'),
                     which(rownames.anova == 'month'),
                     which(rownames.anova == 'cum_depo'),
                     which(rownames.anova == 'year_of_t0'),
                     which(rownames.anova == 'baseline_cd4_count_b'),
                     which(rownames.anova == 'log10_baseline_vl'),
                     which(rownames.anova == 'prior_hcp_t0'),
                     which(rownames.anova == 'prior_non_ade.factor'),
                     which(rownames.anova == 'race_2level.factor'),
                     which(rownames.anova == 'route_of_infection_combined.factor'),
                     which(rownames.anova == 'depo_ind.factor'),
                     which(rownames.anova == 'prior_hcv_t0.factor'))
nade_dth_depo_p <- c(anova(nade_dth_depo[['msm2']])[order.for.anova,'P'])

nade_dth_depo_p <- ifelse(nade_dth_depo_p < 0.001, '< 0.001', ifelse(nade_dth_depo_p >= 0.001 & nade_dth_depo_p < 0.01,
                                                                     format(round(nade_dth_depo_p,3),nsmall=2),
                                                   ifelse(nade_dth_depo_p >= 0.01, format(round(nade_dth_depo_p,2),nsmall=2),'')))
nade_dth_depo_p <- ifelse(is.na(nade_dth_depo_p),'', nade_dth_depo_p)


mh_nade_dth_depo <- by_outcomes_with_depo(max_month = 'max_month_for_mh_nade_dth_outcome',
                           out = 'mh_nade_dth_outcome_indicator',
                           nade_var = 'prior_mh_non_ade.factor', time.df)
mh_nade_dth.depo.df <- subset(time.df, month <= max_month_for_mh_nade_dth_outcome)
dd <- datadist(mh_nade_dth.depo.df)
options(datadist = 'dd')
j <- mh_nade_dth_depo[['msm2']]

mh_nade_dth.depo.summ <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,30),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No')
mh_nade_dth.depo.summ2 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,40),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No')
mh_nade_dth.depo.summ3 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,50),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No')


order.for.summ <- c(which(rownames(mh_nade_dth.depo.summ) == 'age_at_t0'),
                    which(rownames(mh_nade_dth.depo.summ) == 'month'),
                    which(rownames(mh_nade_dth.depo.summ) == 'cum_depo'),
                    which(rownames(mh_nade_dth.depo.summ) == 'year_of_t0'),
                    which(rownames(mh_nade_dth.depo.summ) == 'baseline_cd4_count_b'),
                    which(rownames(mh_nade_dth.depo.summ) == 'log10_baseline_vl'),
                    which(rownames(mh_nade_dth.depo.summ) == 'prior_hcp_t0'),
                    which(rownames(mh_nade_dth.depo.summ) == 'prior_mh_non_ade.factor - Yes:No'),
                    which(rownames(mh_nade_dth.depo.summ) == 'race_2level.factor - Non-white:White'),
                    which(rownames(mh_nade_dth.depo.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'),
                    which(rownames(mh_nade_dth.depo.summ) == 'depo_ind.factor - Yes:No'),
                    which(rownames(mh_nade_dth.depo.summ) == 'prior_hcv_t0.factor - Yes:No'))
order.for.summ <- order.for.summ + 1
mh_nade_dth_depo_or <- format(round(mh_nade_dth.depo.summ[order.for.summ,'Effect'],2),nsmall=2)
mh_nade_dth_depo_or2 <- format(round(mh_nade_dth.depo.summ2[2,'Effect'],2),nsmall=2)
mh_nade_dth_depo_or3 <- format(round(mh_nade_dth.depo.summ3[2,'Effect'],2),nsmall=2)

mh_nade_dth_depo_ci <- paste('(',format(round(mh_nade_dth.depo.summ[order.for.summ,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_dth.depo.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='')

mh_nade_dth_depo_ci2 <- paste('(', format(round(mh_nade_dth.depo.summ2[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_dth.depo.summ2[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')
mh_nade_dth_depo_ci3 <- paste('(', format(round(mh_nade_dth.depo.summ3[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_dth.depo.summ3[2,'Upper 0.95'],2),nsmall=2),
                 ')',sep='')

rownames.anova <- rownames(anova(mh_nade_dth_depo[['msm2']]))

order.for.anova <- c(which(rownames.anova == 'age_at_t0'),
                     which(rownames.anova == 'month'),
                     which(rownames.anova == 'cum_depo'),
                     which(rownames.anova == 'year_of_t0'),
                     which(rownames.anova == 'baseline_cd4_count_b'),
                     which(rownames.anova == 'log10_baseline_vl'),
                     which(rownames.anova == 'prior_hcp_t0'),
                     which(rownames.anova == 'prior_mh_non_ade.factor'),
                     which(rownames.anova == 'race_2level.factor'),
                     which(rownames.anova == 'route_of_infection_combined.factor'),
                     which(rownames.anova == 'depo_ind.factor'),
                     which(rownames.anova == 'prior_hcv_t0.factor'))
mh_nade_dth_depo_p <- c(anova(mh_nade_dth_depo[['msm2']])[order.for.anova,'P'])

mh_nade_dth_depo_p <- ifelse(mh_nade_dth_depo_p < 0.001, '< 0.001', ifelse(mh_nade_dth_depo_p >= 0.001 & mh_nade_dth_depo_p < 0.01,
                                                                 format(round(mh_nade_dth_depo_p,3),nsmall=2),
                                                   ifelse(mh_nade_dth_depo_p >= 0.01,
                                                          format(round(mh_nade_dth_depo_p,2),nsmall=2),'')))
mh_nade_dth_depo_p <- ifelse(is.na(mh_nade_dth_depo_p),'', mh_nade_dth_depo_p)

#------------------------------------------#
mh_nade_only_dth_depo <- by_outcomes_with_depo(max_month = 'max_month_for_mh_nade_only_dth_outcome',
                           out = 'mh_nade_only_dth_outcome_indicator',
                           nade_var = 'prior_mh_non_ade_only.factor', time.df)
mh_nade_only_dth.depo.df <- subset(time.df, month <= max_month_for_mh_nade_only_dth_outcome)
dd <- datadist(mh_nade_only_dth.depo.df)
options(datadist = 'dd')
j <- mh_nade_only_dth_depo[['msm2']]

mh_nade_only_dth.depo.summ <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,30),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No')
mh_nade_only_dth.depo.summ2 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,40),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No')
mh_nade_only_dth.depo.summ3 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,50),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No')


order.for.summ <- c(which(rownames(mh_nade_only_dth.depo.summ) == 'age_at_t0'),
                    which(rownames(mh_nade_only_dth.depo.summ) == 'month'),
                    which(rownames(mh_nade_only_dth.depo.summ) == 'cum_depo'),
                    which(rownames(mh_nade_only_dth.depo.summ) == 'year_of_t0'),
                    which(rownames(mh_nade_only_dth.depo.summ) == 'baseline_cd4_count_b'),
                    which(rownames(mh_nade_only_dth.depo.summ) == 'log10_baseline_vl'),
                    which(rownames(mh_nade_only_dth.depo.summ) == 'prior_hcp_t0'),
                    which(rownames(mh_nade_only_dth.depo.summ) == 'prior_mh_non_ade_only.factor - Yes:No'),
                    which(rownames(mh_nade_only_dth.depo.summ) == 'race_2level.factor - Non-white:White'),
                    which(rownames(mh_nade_only_dth.depo.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'),
                    which(rownames(mh_nade_only_dth.depo.summ) == 'depo_ind.factor - Yes:No'),
                    which(rownames(mh_nade_only_dth.depo.summ) == 'prior_hcv_t0.factor - Yes:No'))
order.for.summ <- order.for.summ + 1
mh_nade_only_dth_depo_or <- format(round(mh_nade_only_dth.depo.summ[order.for.summ,'Effect'],2),nsmall=2)
mh_nade_only_dth_depo_or2 <- format(round(mh_nade_only_dth.depo.summ2[2,'Effect'],2),nsmall=2)
mh_nade_only_dth_depo_or3 <- format(round(mh_nade_only_dth.depo.summ3[2,'Effect'],2),nsmall=2)

mh_nade_only_dth_depo_ci <- paste('(',format(round(mh_nade_only_dth.depo.summ[order.for.summ,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_only_dth.depo.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='')

mh_nade_only_dth_depo_ci2 <- paste('(', format(round(mh_nade_only_dth.depo.summ2[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_only_dth.depo.summ2[2,'Upper 0.95'],2),nsmall=2),')',sep='')
mh_nade_only_dth_depo_ci3 <- paste('(', format(round(mh_nade_only_dth.depo.summ3[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_only_dth.depo.summ3[2,'Upper 0.95'],2),nsmall=2),')',sep='')

rownames.anova <- rownames(anova(mh_nade_only_dth_depo[['msm2']]))

order.for.anova <- c(which(rownames.anova == 'age_at_t0'),
                     which(rownames.anova == 'month'),
                     which(rownames.anova == 'cum_depo'),
                     which(rownames.anova == 'year_of_t0'),
                     which(rownames.anova == 'baseline_cd4_count_b'),
                     which(rownames.anova == 'log10_baseline_vl'),
                     which(rownames.anova == 'prior_hcp_t0'),
                     which(rownames.anova == 'prior_mh_non_ade_only.factor'),
                     which(rownames.anova == 'race_2level.factor'),
                     which(rownames.anova == 'route_of_infection_combined.factor'),
                     which(rownames.anova == 'depo_ind.factor'),
                     which(rownames.anova == 'prior_hcv_t0.factor'))
mh_nade_only_dth_depo_p <- c(anova(mh_nade_only_dth_depo[['msm2']])[order.for.anova,'P'])

mh_nade_only_dth_depo_p <- ifelse(mh_nade_only_dth_depo_p < 0.001, '< 0.001', ifelse(mh_nade_only_dth_depo_p >= 0.001 &
                                  mh_nade_only_dth_depo_p < 0.01,format(round(mh_nade_only_dth_depo_p,3),nsmall=2),
                                                   ifelse(mh_nade_only_dth_depo_p >= 0.01,
                                                          format(round(mh_nade_only_dth_depo_p,2),nsmall=2),'')))
mh_nade_only_dth_depo_p <- ifelse(is.na(mh_nade_only_dth_depo_p),'', mh_nade_only_dth_depo_p)


mh_nade_no_sa_dth_depo <- by_outcomes_with_depo(max_month = 'max_month_for_mh_nade_no_sa_dth_outcome',
                           out = 'mh_nade_no_sa_dth_outcome_indicator',
                           nade_var = 'prior_mh_non_ade_no_sa.factor', time.df)
mh_nade_no_sa_dth.depo.df <- subset(time.df, month <= max_month_for_mh_nade_no_sa_dth_outcome)
dd <- datadist(mh_nade_no_sa_dth.depo.df)
options(datadist = 'dd')
j <- mh_nade_no_sa_dth_depo[['msm2']]

mh_nade_no_sa_dth.depo.summ <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,30),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No')
mh_nade_no_sa_dth.depo.summ2 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,40),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No')
mh_nade_no_sa_dth.depo.summ3 <- summary(j, race_2level.factor = 'White',
                     route_of_infection_combined = 'Non-IDU',
                     prior_hcv_t0.factor = 'No',depo_ind.factor = 'No',
                     year_of_t0 = c(2000, 2003),month=c(10,16),
                     age_at_t0 = c(35,50),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600),
                     log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No')


order.for.summ <- c(which(rownames(mh_nade_no_sa_dth.depo.summ) == 'age_at_t0'),
                    which(rownames(mh_nade_no_sa_dth.depo.summ) == 'month'),
                    which(rownames(mh_nade_no_sa_dth.depo.summ) == 'cum_depo'),
                    which(rownames(mh_nade_no_sa_dth.depo.summ) == 'year_of_t0'),
                    which(rownames(mh_nade_no_sa_dth.depo.summ) == 'baseline_cd4_count_b'),
                    which(rownames(mh_nade_no_sa_dth.depo.summ) == 'log10_baseline_vl'),
                    which(rownames(mh_nade_no_sa_dth.depo.summ) == 'prior_hcp_t0'),
                    which(rownames(mh_nade_no_sa_dth.depo.summ) == 'prior_mh_non_ade_no_sa.factor - Yes:No'),
                    which(rownames(mh_nade_no_sa_dth.depo.summ) == 'race_2level.factor - Non-white:White'),
                    which(rownames(mh_nade_no_sa_dth.depo.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'),
                    which(rownames(mh_nade_no_sa_dth.depo.summ) == 'depo_ind.factor - Yes:No'),
                    which(rownames(mh_nade_no_sa_dth.depo.summ) == 'prior_hcv_t0.factor - Yes:No'))
order.for.summ <- order.for.summ + 1
mh_nade_no_sa_dth_depo_or <- format(round(mh_nade_no_sa_dth.depo.summ[order.for.summ,'Effect'],2),nsmall=2)
mh_nade_no_sa_dth_depo_or2 <- format(round(mh_nade_no_sa_dth.depo.summ2[2,'Effect'],2),nsmall=2)
mh_nade_no_sa_dth_depo_or3 <- format(round(mh_nade_no_sa_dth.depo.summ3[2,'Effect'],2),nsmall=2)

mh_nade_no_sa_dth_depo_ci <- paste('(',format(round(mh_nade_no_sa_dth.depo.summ[order.for.summ,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_no_sa_dth.depo.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='')

mh_nade_no_sa_dth_depo_ci2 <- paste('(', format(round(mh_nade_no_sa_dth.depo.summ2[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_no_sa_dth.depo.summ2[2,'Upper 0.95'],2),nsmall=2),')',sep='')
mh_nade_no_sa_dth_depo_ci3 <- paste('(', format(round(mh_nade_no_sa_dth.depo.summ3[2,'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(mh_nade_no_sa_dth.depo.summ3[2,'Upper 0.95'],2),nsmall=2),')',sep='')

rownames.anova <- rownames(anova(mh_nade_no_sa_dth_depo[['msm2']]))

order.for.anova <- c(which(rownames.anova == 'age_at_t0'),
                     which(rownames.anova == 'month'),
                     which(rownames.anova == 'cum_depo'),
                     which(rownames.anova == 'year_of_t0'),
                     which(rownames.anova == 'baseline_cd4_count_b'),
                     which(rownames.anova == 'log10_baseline_vl'),
                     which(rownames.anova == 'prior_hcp_t0'),
                     which(rownames.anova == 'prior_mh_non_ade_no_sa.factor'),
                     which(rownames.anova == 'race_2level.factor'),
                     which(rownames.anova == 'route_of_infection_combined.factor'),
                     which(rownames.anova == 'depo_ind.factor'),
                     which(rownames.anova == 'prior_hcv_t0.factor'))
mh_nade_no_sa_dth_depo_p <- c(anova(mh_nade_no_sa_dth_depo[['msm2']])[order.for.anova,'P'])

mh_nade_no_sa_dth_depo_p <- ifelse(mh_nade_no_sa_dth_depo_p < 0.001, '< 0.001', ifelse(mh_nade_no_sa_dth_depo_p >= 0.001 &
                                  mh_nade_no_sa_dth_depo_p < 0.01,format(round(mh_nade_no_sa_dth_depo_p,3),nsmall=2),
                                                   ifelse(mh_nade_no_sa_dth_depo_p >= 0.01,
                                                          format(round(mh_nade_no_sa_dth_depo_p,2),nsmall=2),'')))
mh_nade_no_sa_dth_depo_p <- ifelse(is.na(mh_nade_no_sa_dth_depo_p),'', mh_nade_no_sa_dth_depo_p)


# Just need a simple model with death as the outcome
df <- subset(time.df, month <= max_month_for_death_outcome)
df <- time.df[which(time.df$month <= time.df[['max_month_for_death_outcome']]),]
wt.mod.denom <- glm(depo_ind.factor ~ age_at_t0 + rcs(month,4) + race_2level.factor + rcs(cd4_count_b,3) +
                                       (1-undetectable_vl)*log10_hiv1_rna_b + haart_ind + reg_indicator + route_of_infection_combined.factor +
                                       prior_hcv_t0.factor + year_of_t0 + ade_indicator + prior_hcp_t0 + prior_non_ade.factor,
                       data=df, subset=pregnancy_indicator == 0, family='binomial')
denom <- exp(predict(wt.mod.denom))/(1+exp(predict(wt.mod.denom)))

#plus time-invariant covariates from above
wt.mod.num <- glm(hcp_ind.factor ~ rcs(month,4), data=df,
                  subset=pregnancy_indicator == 0 & !is.na(cd4_count_b) & !is.na(log10_hiv1_rna_b),family='binomial')
num <- exp(predict(wt.mod.num))/(1+exp(predict(wt.mod.num)))

wt <- ifelse(df$hcp_ind[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] == 1,
             num/denom, (1-num)/(1-denom))

quant.wt <- quantile(wt, probs=c(0.025,0.975))
wt <- ifelse(wt < quant.wt[1],quant.wt[1],
             ifelse(wt > quant.wt[2],quant.wt[2],wt))

outcome <- df$death_outcome_indicator[which(df$pregnancy_indicator == 0 &
                                            !is.na(df$cd4_count_b) &
                                            !is.na(df$log10_hiv1_rna_b))]
depo_ind.factor <- df$depo_ind.factor[which(df$pregnancy_indicator == 0 &
                                                !is.na(df$cd4_count_b) &
                                                !is.na(df$log10_hiv1_rna_b))]

cum_depo <- df$cum_depo[which(df$pregnancy_indicator == 0 &
                                    !is.na(df$cd4_count_b) &
                                    !is.na(df$log10_hiv1_rna_b))]
pregnancy_indicator <- df$pregnancy_indicator[which(df$pregnancy_indicator == 0 &
                                                    !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))]

msm.depo <- glm(outcome ~ depo_ind.factor + cum_depo, weights=wt,
                        family='quasibinomial',subset=pregnancy_indicator == 0)

outcome2 <- factor(outcome)
dd <<- datadist(df)
options(datadist='dd')

msm2.depo <- lrm(outcome2 ~ depo_ind.factor + cum_depo,weights=wt,
               subset=pregnancy_indicator == 0,tol=-10)

death.simple.depo.summ <- summary(msm2.depo, depo_ind.factor = 'No',cum_depo = c(0,1))

death_simple_depo_or <- format(round(death.simple.depo.summ[which(rownames(death.simple.depo.summ) == ' Odds Ratio'),'Effect'],2),nsmall=2)
death_simple_depo_ci <- paste('(', format(round(death.simple.depo.summ[which(rownames(death.simple.depo.summ) == ' Odds Ratio'),
                                           'Lower 0.95'],2),nsmall=2),
                 ', ',format(round(death.simple.depo.summ[which(rownames(death.simple.depo.summ) == ' Odds Ratio'),
                                   'Upper 0.95'],2),nsmall=2),')',sep='')
rownames.anova <- rownames(anova(msm2.depo))
order.for.anova <- c(which(rownames.anova == 'cum_depo'),
                     which(rownames.anova == 'depo_ind.factor'))
death_simple_depo_p <- c(anova(msm2.depo)[order.for.anova[1:2],'P'])
death_simple_depo_p <- ifelse(death_simple_depo_p < 0.001, '< 0.001', ifelse(death_simple_depo_p >= 0.001 & death_simple_depo_p < 0.01,
                                                                   round(death_simple_depo_p,3),
                                                   ifelse(death_simple_depo_p >= 0.01, round(death_simple_depo_p,2),'')))
death_simple_depo_p <- ifelse(is.na(death_simple_depo_p),'', death_simple_depo_p)


# Frequency table of NADEs
mh_nade_long$last_age <- time.df[match(mh_nade_long$cfar_pid,time.df$cfar_pid,nomatch=NA),'last_age']
sub_mh_nade_long <- subset(mh_nade_long, age_mh_nade >= age_at_t0 & age_mh_nade <= last_age)


params <- grep('age_at_mh',names(mh_nade),value=TRUE)
mh_nade_long2 <- na.omit(reshape(mh_nade,params,v.names='age',timevar='event',idvar='cfar_pid',direction='long',sep=',',
                                drop=setdiff(names(mh_nade),c('cfar_pid',params))))

params <- grep('mh_non_ade_after',names(mh_nade),value=TRUE)
mh_nade$mh_non_ade_after_t01 <- ifelse(mh_nade$mh_non_ade_after_t01 == '',NA,mh_nade$mh_non_ade_after_t01)
mh_nade$mh_non_ade_after_t02 <- ifelse(mh_nade$mh_non_ade_after_t02 == '',NA,mh_nade$mh_non_ade_after_t02)
mh_nade$mh_non_ade_after_t03 <- ifelse(mh_nade$mh_non_ade_after_t03 == '',NA,mh_nade$mh_non_ade_after_t03)
mh_nade$mh_non_ade_after_t04 <- ifelse(mh_nade$mh_non_ade_after_t04 == '',NA,mh_nade$mh_non_ade_after_t04)
mh_nade$mh_non_ade_after_t05 <- ifelse(mh_nade$mh_non_ade_after_t05 == '',NA,mh_nade$mh_non_ade_after_t05)
mh_nade$mh_non_ade_after_t06 <- ifelse(mh_nade$mh_non_ade_after_t06 == '',NA,mh_nade$mh_non_ade_after_t06)

nade_event <- na.omit(reshape(mh_nade,params,v.names='nade',timevar='event',idvar='cfar_pid',direction='long',sep=',',
                              drop=setdiff(names(mh_nade),c('cfar_pid',params))))
mh_nade_long2$pid.event <- paste(mh_nade_long2$cfar_pid,mh_nade_long2$event,sep='.')
nade_event$pid.event <- paste(nade_event$cfar_pid,nade_event$event,sep='.')


# Two NADEs listed without ages so must remove them
nade_event <- nade_event[which(nade_event$pid.event %in% intersect(mh_nade_long2$pid.event,nade_event$pid.event)),]

mh_nade_long2$nade <- nade_event[match(mh_nade_long2$pid.event,nade_event$pid.event,nomatch=NA),'nade']


mh_nade$mh_non_ade_category1 <- ifelse(mh_nade$mh_non_ade_category1 == '',NA,mh_nade$mh_non_ade_category1)
mh_nade$mh_non_ade_category2 <- ifelse(mh_nade$mh_non_ade_category2 == '',NA,mh_nade$mh_non_ade_category2)
mh_nade$mh_non_ade_category3 <- ifelse(mh_nade$mh_non_ade_category3 == '',NA,mh_nade$mh_non_ade_category3)
mh_nade$mh_non_ade_category4 <- ifelse(mh_nade$mh_non_ade_category4 == '',NA,mh_nade$mh_non_ade_category4)
mh_nade$mh_non_ade_category5 <- ifelse(mh_nade$mh_non_ade_category5 == '',NA,mh_nade$mh_non_ade_category5)
mh_nade$mh_non_ade_category6 <- ifelse(mh_nade$mh_non_ade_category6 == '',NA,mh_nade$mh_non_ade_category6)

params <- grep('mh_non_ade_category',names(mh_nade),value=TRUE)
nade_category <- na.omit(reshape(mh_nade,params,v.names='nade_category',timevar='event',idvar='cfar_pid',direction='long',sep=',',
                                 drop=setdiff(names(mh_nade),c('cfar_pid',params))))
nade_category$pid.event <- paste(nade_category$cfar_pid,nade_category$event,sep='.')
nade_category <- nade_category[which(nade_category$pid.event %in% intersect(mh_nade_long2$pid.event,nade_category$pid.event)),]

mh_nade_long2$nade_category <- nade_category[match(mh_nade_long2$pid.event,nade_category$pid.event,nomatch=NA),'nade_category']


mh_nade_long2 <- subset(mh_nade_long2, nade_category != 'SUBSTANCE ABUSE')


mh_nade_long2$age_at_t0 <- mh_nade[match(mh_nade_long2$cfar_pid,mh_nade$cfar_pid,nomatch=NA),'age_at_t0']
mh_nade_long2$age_after_year_gap <- mh_nade[match(mh_nade_long2$cfar_pid,mh_nade$cfar_pid,nomatch=NA),'age_after_year_gap']
mh_nade_long2$last_age <- time.df[match(mh_nade_long2$cfar_pid, time.df$cfar_pid, nomatch=NA),'last_age']

mh_nade_long2 <- subset(mh_nade_long2, age >= age_at_t0 & age <= last_age & cfar_pid %in% unique(time.df$cfar_pid))
mh_nade_long2$max_number_months_followed <- time.df[match(mh_nade_long2$cfar_pid, time.df$cfar_pid, nomatch=NA),'max_number_months_followed']
mh_nade_long2$max_number_years_followed <- mh_nade_long2$max_number_months_followed/12


mh_nade_long2$month <- with(mh_nade_long2, ceiling((age - age_at_t0)*12))

mh_nade_long2$pid.month <- paste(mh_nade_long2$cfar_pid, mh_nade_long2$month, sep='.')
time.df$nade_name <- mh_nade_long2[match(time.df$pid.month,mh_nade_long2$pid.month,nomatch=NA),'nade']
time.df$nade_category <- mh_nade_long2[match(time.df$pid.month,mh_nade_long2$pid.month,nomatch=NA),'nade_category']
dup.ids.month <- mh_nade_long2[duplicated(mh_nade_long2$pid.month),'pid.month']
mh_nade_long2$duplicated_pid.month <- ifelse(mh_nade_long2$pid.month %in% dup.ids.month,1,0)
dup.records <- mh_nade_long2[duplicated(mh_nade_long2$pid.month),]


time.df$month_of_first_nade <- with(time.df, ceiling((age_of_first_nade - age_at_t0)*12))
time.df$month_of_first_mh_nade_only <- with(time.df, ceiling((age_of_first_mh_nade_only - age_at_t0)*12))
time.df$month_of_first_mh_nade_no_sa <- with(time.df, ceiling((age_of_first_mh_nade_no_sa - age_at_t0)*12))
time.df$duplicated_pid.month <- mh_nade_long2[match(time.df$pid.month,mh_nade_long2$pid.month,nomatch=NA),'duplicated_pid.month']
time.df$nade_category2 <- dup.records[match(time.df$pid.month,dup.records$pid.month,nomatch=NA),'nade_category']
time.df$nade_name2 <- dup.records[match(time.df$pid.month,dup.records$pid.month,nomatch=NA),'nade']

time.df$nade_category_new <- ifelse(time.df$nade_category %in% c('CARDIOVASCULAR','CEREBROVASCULAR'),'CARDIOVASCULAR OR CEREBROVASCULAR',
                                    time.df$nade_category)
time.df$nade_category2_new <- ifelse(time.df$nade_category2 %in% c('CARDIOVASCULAR','CEREBROVASCULAR'),'CARDIOVASCULAR OR CEREBROVASCULAR',
                                     time.df$nade_category2)


incidence_function <- function(df,nade_of_interest){
   # Event data
   evt_df <- subset(df, nade_category_new %in% nade_of_interest | nade_category2_new %in% nade_of_interest)
   evt_df <- evt_df[order(evt_df$cfar_pid,evt_df$age),]

   evt_df$nade_category_new <- ifelse(evt_df$nade_category_new %nin% nade_of_interest,NA,evt_df$nade_category_new)
   evt_df$nade_category2_new <- ifelse(evt_df$nade_category2_new %nin% nade_of_interest,NA,evt_df$nade_category2_new)


   # Counting ALL events, not just the first one
   tot_evts_hc <- length(which(evt_df$hcp_ind == 1))
   tot_evts_no_hc <- length(which(evt_df$hcp_ind == 0))
   evt_df <- evt_df[!duplicated(evt_df$cfar_pid),]
   evt_df$cfar_pid <- factor(evt_df$cfar_pid)
   # Data with no event of interest
   no_evt_df <- subset(df, cfar_pid %nin% evt_df$cfar_pid)
   no_evt_df$cfar_pid <- factor(no_evt_df$cfar_pid)

   # -----  Find the max cumulative time not on HCP   ---- #
   # In the event data
   evt_df$max_cum_no_hcp <- NA
   split(evt_df$max_cum_no_hcp, evt_df$cfar_pid) <- lapply(split(evt_df$cum_no_hcp_modified,evt_df$cfar_pid),FUN=function(y){
                                                           return(max(y)) })

   # In the non-event data
   no_evt_df$max_cum_no_hcp <- NA
   split(no_evt_df$max_cum_no_hcp,no_evt_df$cfar_pid) <- lapply(split(no_evt_df$cum_no_hcp_modified,no_evt_df$cfar_pid),FUN=function(y){
                                                                        return(max(y)) })

   # -----  Find the max cumulative time on HCP   ---- #
   # In the non-event data
   no_evt_df$max_cum_hcp <- NA
   split(no_evt_df$max_cum_hcp, no_evt_df$cfar_pid) <- lapply(split(no_evt_df$new_cum_hcp,no_evt_df$cfar_pid),FUN=function(y){
                                                                      return(max(y)) })

   # Calculating incidences of nade of interest
   if(nade_of_interest[1] == 'DEATH'){
      num_evt_hc <- length(which(evt_df$hcp_ind == 1))
   } else {
      num_evt_hc <- length(which(!is.na(evt_df$nade_category_new) & evt_df$hcp_ind == 1)) +
                    length(which(!is.na(evt_df$nade_category2_new[which(evt_df$nade_category2_new != 'DEATH')]) &
                    evt_df$hcp_ind[which(evt_df$nade_category2_new != 'DEATH')] == 1))
   }

    fu_on_hc <- sum(evt_df$new_cum_hcp) + sum(no_evt_df$max_cum_hcp[!duplicated(no_evt_df$cfar_pid)])


#    sub_no_evt_df <- subset(no_evt_df, hcp_ind == 0)
   if(nade_of_interest[1] == 'DEATH'){
      num_evt_no_hc <- length(which(evt_df$hcp_ind == 0))
   } else {
      num_evt_no_hc <- length(which(!is.na(evt_df$nade_category_new) & evt_df$hcp_ind == 0)) +
                       length(which(!is.na(evt_df$nade_category2_new[which(evt_df$nade_category2_new != 'DEATH')]) &
                              evt_df$hcp_ind[which(evt_df$nade_category2_new != 'DEATH')] == 0))
   }

   fu_no_hc <- sum(evt_df$max_cum_no_hcp) + sum(no_evt_df$max_cum_no_hcp[!duplicated(no_evt_df$cfar_pid)])

   # For those on HC (number of nade_of_interest while on HC/(sum of time on HC for those with event and those without event)
   incidence_hc <- format(round(num_evt_hc/fu_on_hc*1000,2),nsmall=2)

   incidence_no_hc <- format(round(num_evt_no_hc/fu_no_hc*1000,2), nsmall=2)


   fu_on_hc <- format(round(fu_on_hc,2),nsmall=2)
   fu_no_hc <- format(round(fu_no_hc,2),nsmall=2)

   return(list(incidence_hc=incidence_hc,
               num_evt_hc = num_evt_hc,
               tot_evts_hc = tot_evts_hc,
               fu_on_hc = fu_on_hc,
               incidence_no_hc=incidence_no_hc,
               num_evt_no_hc = num_evt_no_hc,
               tot_evts_no_hc = tot_evts_no_hc,
               fu_no_hc = fu_no_hc))
}


cardio_incidence <- incidence_function(time.df,nade_of_interest='CARDIOVASCULAR OR CEREBROVASCULAR')
hepatic_incidence <- incidence_function(time.df,nade_of_interest='HEPATIC')
mh_incidence <- incidence_function(time.df, nade_of_interest='MENTAL HEALTH')
metabolic_incidence <- incidence_function(time.df, nade_of_interest='METABOLIC')
renal_incidence <- incidence_function(time.df, nade_of_interest='RENAL')
any_nade_incidence <- incidence_function(time.df, nade_of_interest=c('CARDIOVASCULAR OR CEREBROVASCULAR','HEPATIC','METABOLIC','MALIGNANCY','RENAL'))
any_mh_nade_no_sa_incidence <- incidence_function(time.df, nade_of_interest=c('CARDIOVASCULAR OR CEREBROVASCULAR','HEPATIC','MENTAL HEALTH','METABOLIC','MALIGNANCY','RENAL'))

# Calculating incidence rates for death.  Will fill in nade_category_new with DEATH; for the subject who had a value at nade_category_new, will put it in nade_category2_new
which.rows <- which(time.df$month == time.df$month_at_death)
time.df$nade_category[which.rows] <- ifelse(is.na(time.df$nade_category[which.rows]),'DEATH',time.df$nade_category[which.rows])
time.df$nade_category_new[which.rows] <- ifelse(is.na(time.df$nade_category_new[which.rows]),'DEATH',time.df$nade_category_new[which.rows])
time.df$nade_category2_new[which.rows] <- ifelse(is.na(time.df$nade_category2_new[which.rows]),'DEATH',time.df$nade_category2_new[which.rows])
time.df$nade_category2_new <- ifelse(!is.na(time.df$nade_category_new) & time.df$nade_category_new == 'DEATH',NA,
                                     time.df$nade_category2_new)
death_incidence <- incidence_function(time.df, nade_of_interest='DEATH')


# Alternate way of computing IRR
incidence_function2 <- function(df,nade_of_interest){
   evt_df <- subset(df, nade_category_new %in% nade_of_interest | nade_category2_new %in% nade_of_interest)
   evt_df <- evt_df[order(evt_df$cfar_pid,evt_df$age),]
   evt_df$nade_category_new <- ifelse(evt_df$nade_category_new %nin% nade_of_interest,NA,evt_df$nade_category_new)
   evt_df$nade_category2_new <- ifelse(evt_df$nade_category2_new %nin% nade_of_interest,NA,evt_df$nade_category2_new)

   which.duplicated <- which(duplicated(evt_df$cfar_pid))


   num_evt_hc <- length(which(!is.na(evt_df$nade_category_new) & evt_df$hcp_ind == 1)) +
                 length(which(!is.na(evt_df$nade_category2_new) & evt_df$hcp_ind == 1))

   fu_on_hc <- sum(df$max_cum_hcp[!duplicated(df$cfar_pid)])
   incidence_hc <- format(round(num_evt_hc/fu_on_hc*1000,2),nsmall=2)

   num_evt_no_hc <- length(which(!is.na(evt_df$nade_category_new) & evt_df$hcp_ind == 0)) +
                    length(which(!is.na(evt_df$nade_category2_new) & evt_df$hcp_ind == 0))

   fu_no_hc <- sum(df$max_cum_no_hcp[!duplicated(df$cfar_pid)])
   incidence_no_hc <- format(round(num_evt_no_hc/fu_no_hc*1000,2),nsmall=2)

   fu_on_hc <- format(round(fu_on_hc,2),nsmall=2)
   fu_no_hc <- format(round(fu_no_hc,2),nsmall=2)

   return(list(num_evt_hc = num_evt_hc,
               incidence_hc = incidence_hc,
               fu_on_hc = fu_on_hc,
               num_evt_no_hc = num_evt_no_hc,
               incidence_no_hc = incidence_no_hc,
               fu_no_hc = fu_no_hc))
}

time.df$max_cum_hcp <- NA
split(time.df$max_cum_hcp, time.df$cfar_pid) <- lapply(split(time.df$new_cum_hcp,time.df$cfar_pid),FUN=function(y){
                                                   max(y) })

time.df$max_cum_no_hcp <- NA
split(time.df$max_cum_no_hcp, time.df$cfar_pid) <- lapply(split(time.df$cum_no_hcp, time.df$cfar_pid), FUN=function(y){
                                                      max(y) })

cardio_incidence2 <- incidence_function2(time.df,nade_of_interest='CARDIOVASCULAR OR CEREBROVASCULAR')
hepatic_incidence2 <- incidence_function2(time.df,nade_of_interest='HEPATIC')
mh_incidence2 <- incidence_function2(time.df, nade_of_interest='MENTAL HEALTH')
metabolic_incidence2 <- incidence_function2(time.df, nade_of_interest='METABOLIC')
renal_incidence2 <- incidence_function2(time.df, nade_of_interest='RENAL')
any_nade_incidence2 <- incidence_function2(time.df, nade_of_interest=c('CARDIOVASCULAR OR CEREBROVASCULAR','HEPATIC','METABOLIC','MALIGNANCY','RENAL'))
any_mh_nade_no_sa_incidence2 <- incidence_function2(time.df, nade_of_interest=c('CARDIOVASCULAR OR CEREBROVASCULAR','HEPATIC','MENTAL HEALTH','METABOLIC','MALIGNANCY','RENAL'))
death_incidence2 <- incidence_function2(time.df, nade_of_interest='DEATH')


incidence_rate_df <- data.frame('NADE Category'=c('CARDIOVASCULAR OR CEREBROVASCULAR',
                                                  'HEPATIC',
                                                  'MENTAL HEALTH',
                                                  'METABOLIC',
                                                  'RENAL',
                                                  'Any NADE',
                                                  'Any NADE (including mental health)',
                                                  'Death'),
                                'on_hc'=c(paste(cardio_incidence[['incidence_hc']],' (',cardio_incidence[['num_evt_hc']],')',sep=''),
                                              paste(hepatic_incidence[['incidence_hc']],' (',hepatic_incidence[['num_evt_hc']],')',sep=''),
                                              paste(mh_incidence[['incidence_hc']],' (',mh_incidence[['num_evt_hc']],')',sep=''),
                                              paste(metabolic_incidence[['incidence_hc']],' (',metabolic_incidence[['num_evt_hc']],')',sep=''),
                                              paste(renal_incidence[['incidence_hc']],' (',renal_incidence[['num_evt_hc']],')',sep=''),
                                              paste(any_nade_incidence[['incidence_hc']],' (',any_nade_incidence[['num_evt_hc']],')',sep=''),
                                              paste(any_mh_nade_no_sa_incidence[['incidence_hc']],' (',any_mh_nade_no_sa_incidence[['num_evt_hc']],')',sep=''),
                                              paste(death_incidence[['incidence_hc']],' (',death_incidence[['num_evt_hc']],')',sep='')),
                                'not_on_hc'=c(paste(cardio_incidence[['incidence_no_hc']],' (',cardio_incidence[['num_evt_no_hc']],')',sep=''),
                                              paste(hepatic_incidence[['incidence_no_hc']],' (',hepatic_incidence[['num_evt_no_hc']],')',sep=''),
                                              paste(mh_incidence[['incidence_no_hc']],' (',mh_incidence[['num_evt_no_hc']],')',sep=''),
                                              paste(metabolic_incidence[['incidence_no_hc']],' (',metabolic_incidence[['num_evt_no_hc']],')',sep=''),
                                              paste(renal_incidence[['incidence_no_hc']],' (',renal_incidence[['num_evt_no_hc']],')',sep=''),
                                              paste(any_nade_incidence[['incidence_no_hc']],' (',any_nade_incidence[['num_evt_no_hc']],')',sep=''),
                                              paste(any_mh_nade_no_sa_incidence[['incidence_no_hc']],' (',any_mh_nade_no_sa_incidence[['num_evt_no_hc']],')',sep=''),
                                              paste(death_incidence[['incidence_no_hc']],' (',death_incidence[['num_evt_no_hc']],')',sep='')),
                                 'on_hc2'=c(paste(cardio_incidence2[['incidence_hc']],' (',cardio_incidence2[['num_evt_hc']],')',sep=''),
                                              paste(hepatic_incidence2[['incidence_hc']],' (',hepatic_incidence2[['num_evt_hc']],')',sep=''),
                                              paste(mh_incidence2[['incidence_hc']],' (',mh_incidence2[['num_evt_hc']],')',sep=''),
                                              paste(metabolic_incidence2[['incidence_hc']],' (',metabolic_incidence2[['num_evt_hc']],')',sep=''),
                                              paste(renal_incidence2[['incidence_hc']],' (',renal_incidence2[['num_evt_hc']],')',sep=''),
                                              paste(any_nade_incidence2[['incidence_hc']],' (',any_nade_incidence2[['num_evt_hc']],')',sep=''),
                                              paste(any_mh_nade_no_sa_incidence2[['incidence_hc']],' (',any_mh_nade_no_sa_incidence2[['num_evt_hc']],')',sep=''),
                                              paste(death_incidence2[['incidence_hc']],' (',death_incidence2[['num_evt_hc']],')',sep='')),
                                'not_on_hc2'=c(paste(cardio_incidence2[['incidence_no_hc']],' (',cardio_incidence2[['num_evt_no_hc']],')',sep=''),
                                              paste(hepatic_incidence2[['incidence_no_hc']],' (',hepatic_incidence2[['num_evt_no_hc']],')',sep=''),
                                              paste(mh_incidence2[['incidence_no_hc']],' (',mh_incidence2[['num_evt_no_hc']],')',sep=''),
                                              paste(metabolic_incidence2[['incidence_no_hc']],' (',metabolic_incidence2[['num_evt_no_hc']],')',sep=''),
                                              paste(renal_incidence2[['incidence_no_hc']],' (',renal_incidence2[['num_evt_no_hc']],')',sep=''),
                                              paste(any_nade_incidence2[['incidence_no_hc']],' (',any_nade_incidence2[['num_evt_no_hc']],')',sep=''),
                                              paste(any_mh_nade_no_sa_incidence2[['incidence_no_hc']],' (',any_mh_nade_no_sa_incidence2[['num_evt_no_hc']],')',sep=''),
                                              paste(death_incidence2[['incidence_no_hc']],' (',death_incidence2[['num_evt_no_hc']],')',sep='')))

incidence_rate_df <- as.matrix(incidence_rate_df)
colnames(incidence_rate_df) <- c('NADE Category','On HC','Not on HC','On HC','Not on HC')


incidence_rate_df2 <- data.frame('NADE Category'=c('CARDIOVASCULAR OR CEREBROVASCULAR',
                                                  'HEPATIC',
                                                  'MENTAL HEALTH',
                                                  'METABOLIC',
                                                  'RENAL',
                                                  'Any NADE',
                                                  'Any NADE (including mental health)',
                                                  'Death'),
                                 'on_hc2'=c(paste(cardio_incidence2[['incidence_hc']],' (',cardio_incidence2[['num_evt_hc']],')',sep=''),
                                              paste(hepatic_incidence2[['incidence_hc']],' (',hepatic_incidence2[['num_evt_hc']],')',sep=''),
                                              paste(mh_incidence2[['incidence_hc']],' (',mh_incidence2[['num_evt_hc']],')',sep=''),
                                              paste(metabolic_incidence2[['incidence_hc']],' (',metabolic_incidence2[['num_evt_hc']],')',sep=''),
                                              paste(renal_incidence2[['incidence_hc']],' (',renal_incidence2[['num_evt_hc']],')',sep=''),
                                              paste(any_nade_incidence2[['incidence_hc']],' (',any_nade_incidence2[['num_evt_hc']],')',sep=''),
                                              paste(any_mh_nade_no_sa_incidence2[['incidence_hc']],' (',any_mh_nade_no_sa_incidence2[['num_evt_hc']],')',sep=''),
                                              paste(death_incidence2[['incidence_hc']],' (',death_incidence2[['num_evt_hc']],')',sep='')),
                                'not_on_hc2'=c(paste(cardio_incidence2[['incidence_no_hc']],' (',cardio_incidence2[['num_evt_no_hc']],')',sep=''),
                                              paste(hepatic_incidence2[['incidence_no_hc']],' (',hepatic_incidence2[['num_evt_no_hc']],')',sep=''),
                                              paste(mh_incidence2[['incidence_no_hc']],' (',mh_incidence2[['num_evt_no_hc']],')',sep=''),
                                              paste(metabolic_incidence2[['incidence_no_hc']],' (',metabolic_incidence2[['num_evt_no_hc']],')',sep=''),
                                              paste(renal_incidence2[['incidence_no_hc']],' (',renal_incidence2[['num_evt_no_hc']],')',sep=''),
                                              paste(any_nade_incidence2[['incidence_no_hc']],' (',any_nade_incidence2[['num_evt_no_hc']],')',sep=''),
                                              paste(any_mh_nade_no_sa_incidence2[['incidence_no_hc']],' (',any_mh_nade_no_sa_incidence2[['num_evt_no_hc']],')',sep=''),
                                              paste(death_incidence2[['incidence_no_hc']],' (',death_incidence2[['num_evt_no_hc']],')',sep='')))

incidence_rate_df2 <- as.matrix(incidence_rate_df2)
colnames(incidence_rate_df2) <- c('NADE Category','On HC','Not on HC')


# Calculating incidence from actual ages and not artifical ones from monthly data frame
mh_nade_long$age_after_year_gap <- sub_ages[match(mh_nade_long$cfar_pid,sub_ages$cfar_pid,nomatch=NA),'age_after_year_gap']
mh_nade_long$last_age <- time.df[match(mh_nade_long$cfar_pid,time.df$cfar_pid,nomatch=NA),'last_age']

jk <- subset(mh_nade_long, mh_nade_category != 'SUBSTANCE ABUSE' & cfar_pid %in% time.df$cfar_pid & age_mh_nade <= last_age & (is.na(age_after_year_gap) | age_mh_nade < age_after_year_gap))
jk$pid.age <- paste(jk$cfar_pid,jk$age_mh_nade,sep='.')
dup.ids <- unique(jk$pid.age[which(duplicated(jk$pid.age))])
jk$dup.ids <- 0
jk$dup.ids[which(jk$pid.age %in% dup.ids)] <- 1
tmp <- subset(jk, dup.ids == 1)
tmp <- tmp[order(tmp$cfar_pid, tmp$age_mh_nade),]
tmp$nade <- NA
split(tmp$nade,tmp$cfar_pid) <- lapply(split(tmp$cfar_pid,tmp$cfar_pid),FUN=function(y){seq(1,length(y))})
tmp.wide <- reshape(subset(tmp,select=c(cfar_pid,age_mh_nade,mh_nade_category,age_at_t0,last_age,pid.age,nade)), v.names='mh_nade_category',idvar='pid.age',timevar='nade',direction='wide')
names(tmp.wide)[6:7] <- c('nade_category_new','nade_category2_new')
tmp.wide$nade_category_new <- ifelse(tmp.wide$nade_category_new %in% c('CARDIOVASCULAR','CEREBROVASCULAR'),'CARDIOVASCULAR OR CEREBROVASCULAR',tmp.wide$nade_category_new)
tmp.wide$nade_category2_new <- ifelse(tmp.wide$nade_category2_new %in% c('CARDIOVASCULAR','CEREBROVASCULAR'),'CARDIOVASCULAR OR CEREBROVASCULAR',tmp.wide$nade_category2_new)
jk <- subset(jk, dup.ids == 0)
jk$nade_category_new <- ifelse(!is.na(jk$mh_nade_category) & jk$mh_nade_category %in% c('CARDIOVASCULAR','CEREBROVASCULAR'),'CARDIOVASCULAR OR CEREBROVASCULAR',jk$mh_nade_category)
jk.tmp <- merge(jk,tmp.wide,all=TRUE)


hcp_total$age_after_year_gap <- time_diff.df[match(hcp_total$cfar_pid,time_diff.df$cfar_pid,nomatch=NA),'age_after_year_gap']
hcp_total$last_age <- time.df[match(hcp_total$cfar_pid,time.df$cfar_pid,nomatch=NA),'last_age']
hcp_total$age_at_death <- time.df[match(hcp_total$cfar_pid,time.df$cfar_pid,nomatch=NA),'age_at_death']
hcp_total$last_age <- ifelse(!is.na(hcp_total$age_at_death) & (!is.na(hcp_total$age_after_year_gap) & hcp_total$age_at_death < hcp_total$age_after_year_gap | is.na(hcp_total$age_after_year_gap)), 
                             hcp_total$age_at_death,hcp_total$last_age)


hcp_total$hcp_diff <- apply(subset(hcp_total, select=c(age_at_hcp_start_date_modified, age_at_hcp_end_date,age_after_year_gap,last_age)),1,FUN=function(y){
               if(y['age_at_hcp_start_date_modified'] > y['last_age']){
                  y['age_at_hcp_start_date_modified'] <- y['age_at_hcp_end_date'] <- NA
               }
               if(!is.na(y['age_at_hcp_end_date']) & y['age_at_hcp_end_date'] > y['last_age']){
                  y['age_at_hcp_end_date'] <- y['last_age']
               }
               if(!is.na(y['age_after_year_gap']) & y['age_at_hcp_start_date_modified'] >= y['age_after_year_gap']){
                  y['age_at_hcp_start_date_modified'] <- y['age_at_hcp_end_date'] <- NA
               }
               if(!is.na(y['age_after_year_gap']) & !is.na(y['age_at_hcp_end_date']) & y['age_at_hcp_end_date'] >= y['age_after_year_gap']){
                  y['age_at_hcp_end_date'] <- y['last_age']
               } 
               hcp_diff <- y['age_at_hcp_end_date'] - y['age_at_hcp_start_date_modified']
               if(is.na(hcp_diff)){
                  hcp_diff <- 0
               }
               
               return(hcp_diff) })


hcp_total$cum_hcp <- NA
split(hcp_total$cum_hcp, hcp_total$cfar_pid) <- lapply(split(hcp_total$hcp_diff,hcp_total$cfar_pid),FUN=function(y){
                                                   cumsum(y) })
hcp_total$max_cum_hcp <- NA
split(hcp_total$max_cum_hcp,hcp_total$cfar_pid) <- lapply(split(hcp_total$cum_hcp,hcp_total$cfar_pid),FUN=function(y){
                                                      max(y)})

jk.tmp2 <- merge(jk.tmp,subset(hcp_total,select=c(cfar_pid,age_at_hcp_start_date_modified,age_at_hcp_end_date,max_cum_hcp,hcp_title_category)),all=TRUE) 
jk.tmp2$nade_on_hcp <- ifelse(jk.tmp2$age_mh_nade >= jk.tmp2$age_at_hcp_start_date_modified & jk.tmp2$age_mh_nade <= jk.tmp2$age_at_hcp_end_date,1,0)
hcp_jk.tmp2 <- subset(jk.tmp2, cfar_pid %in% hcp_total$cfar_pid,select=c(cfar_pid,nade_category_new,nade_category2_new,max_cum_hcp))


time.df$age_after_year_gap <- sub_ages[match(time.df$cfar_pid,sub_ages$cfar_pid,nomatch=NA),'age_after_year_gap']
nhcp_df <- subset(time.df, cfar_pid %in% unique(time.df$cfar_pid))
nhcp_df$last_age <- ifelse(!is.na(nhcp_df$age_at_death),nhcp_df$age_at_death,nhcp_df$last_age)
nhcp_df$max_cum_hcp <- hcp_total[match(nhcp_df$cfar_pid, hcp_total$cfar_pid,nomatch=NA),'max_cum_hcp']
nhcp_df$max_cum_hcp <- ifelse(is.na(nhcp_df$max_cum_hcp),0,nhcp_df$max_cum_hcp)
nhcp_df$max_cum_no_hcp <- with(nhcp_df, last_age - age_at_t0 - max_cum_hcp)

nhcp_df$tdf_max_cum_hcp <- time.df[match(nhcp_df$cfar_pid,time.df$cfar_pid,nomatch=NA),'max_cum_hcp']
nhcp_df$tdf_max_cum_no_hcp <- time.df[match(nhcp_df$cfar_pid,time.df$cfar_pid,nomatch=NA),'max_cum_no_hcp']
nhcp_df$tdf_cum_and_oth <- with(nhcp_df, tdf_max_cum_hcp - max_cum_hcp)
nhcp_df$tdf_cum_no_and_oth <- with(nhcp_df, tdf_max_cum_no_hcp - max_cum_no_hcp)

# Now adding in max_cum_no_hcp to jk.tmp2 for those IDs in both
jk.tmp2$max_cum_no_hcp <- nhcp_df[match(jk.tmp2$cfar_pid,nhcp_df$cfar_pid,nomatch=NA),'max_cum_no_hcp']
jk.tmp3 <- merge(jk.tmp2, subset(nhcp_df, cfar_pid %nin% jk.tmp2$cfar_pid & !duplicated(cfar_pid),
                                select=c(cfar_pid,age_at_t0,last_age,prior_mh_non_ade,prior_mh_non_ade_no_sa,prior_mh_non_ade_only,max_cum_hcp,max_cum_no_hcp)),all=TRUE)
jk.tmp3$nade_on_hcp <- ifelse(is.na(jk.tmp3$nade_on_hcp),0,jk.tmp3$nade_on_hcp)
jk.tmp3$max_cum_hcp <- ifelse(is.na(jk.tmp3$max_cum_hcp),0,jk.tmp3$max_cum_hcp)
jk.tmp3$max_cum_no_hcp <- ifelse(is.na(jk.tmp3$max_cum_no_hcp),0,jk.tmp3$max_cum_no_hcp)
jk.tmp3$age_at_death <- time.df[match(jk.tmp3$cfar_pid,time.df$cfar_pid,nomatch=NA),'age_at_death']
jk.tmp3$death_on_hcp <- ifelse(!is.na(jk.tmp3$age_at_death) & !is.na(jk.tmp3$age_at_hcp_start_date_modified) & jk.tmp3$age_at_death >= jk.tmp3$age_at_hcp_start_date_modified & 
                              jk.tmp3$age_at_death <= jk.tmp3$age_at_hcp_end_date,1,0)
# A few with missing last age so will match it in again
jk.tmp3$last_age <- time.df[match(jk.tmp3$cfar_pid, time.df$cfar_pid,nomatch=NA),'last_age']


incidence_function3 <- function(df,nade_of_interest){
   if('DEATH' %nin% nade_of_interest){
      df$nade_category_new <- ifelse(!is.na(df$nade_category_new) & df$nade_category_new %nin% nade_of_interest, NA, df$nade_category_new)
      df$nade_category2_new <- ifelse(!is.na(df$nade_category2_new) & df$nade_category2_new %nin% nade_of_interest, NA, df$nade_category2_new)
   
      evt_hc_df <- subset(df, !duplicated(pid.age) & !is.na(pid.age) & (nade_category_new %in% nade_of_interest | 
                                                                        nade_category2_new %in% nade_of_interest) & 
                              nade_on_hcp == 1)

                                                     
      num_evt_hc <- length(which(!is.na(evt_hc_df$nade_category_new) & evt_hc_df$nade_category_new %in% nade_of_interest)) + 
                    length(which(!is.na(evt_hc_df$nade_category2_new) & evt_hc_df$nade_category_new %in% nade_of_interest))

      fu_on_hc <- sum(df$max_cum_hcp[!duplicated(df$cfar_pid)])
      incidence_hc <- format(round(num_evt_hc/fu_on_hc*1000,2),nsmall=2)

   
      evt_no_hc_df <- subset(df, !duplicated(pid.age) & !is.na(pid.age) & (nade_category_new %in% nade_of_interest | 
                                                                           nade_category2_new %in% nade_of_interest) & 
                                nade_on_hcp == 0)

      num_evt_no_hc <- length(which(!is.na(evt_no_hc_df$nade_category_new) & evt_no_hc_df$nade_on_hcp == 0)) + 
                       length(which(!is.na(evt_no_hc_df$nade_category2_new) & evt_no_hc_df$nade_on_hcp == 0))
   
      fu_not_on_hc <- sum(df$max_cum_no_hcp[!duplicated(df$cfar_pid)])
      incidence_no_hc <- format(round(num_evt_no_hc/fu_not_on_hc*1000,2),nsmall=2)
   } else {
      num_evt_hc <- length(which(!is.na(df$age_at_death) & df$death_on_hcp == 1))
      fu_on_hc <- sum(df$max_cum_hcp[!duplicated(df$cfar_pid)])
      incidence_hc <- format(round(num_evt_hc/fu_on_hc*1000,2),nsmall=2)
      
      num_evt_no_hc <- length(which(!is.na(df$age_at_death[!duplicated(df$cfar_pid)]))) - num_evt_hc
      fu_not_on_hc <- sum(df$max_cum_no_hcp[!duplicated(df$cfar_pid)])
      incidence_no_hc <- format(round(num_evt_no_hc/fu_not_on_hc*1000,2),nsmall=2)
   }
   fu_on_hc <- round(fu_on_hc,2)
   fu_not_on_hc <- round(fu_not_on_hc,2)
   
   return(list(num_evt_hc = num_evt_hc,
               incidence_hc = incidence_hc,
               fu_on_hc = fu_on_hc,
               num_evt_no_hc = num_evt_no_hc,
               incidence_no_hc = incidence_no_hc,
               fu_not_on_hc = fu_not_on_hc))

}

cardio_incidence3 <- incidence_function3(df=jk.tmp3,nade_of_interest='CARDIOVASCULAR OR CEREBROVASCULAR')
hepatic_incidence3 <- incidence_function3(df=jk.tmp3,nade_of_interest='HEPATIC')
mh_incidence3 <- incidence_function3(df=jk.tmp3, nade_of_interest='MENTAL HEALTH')
metabolic_incidence3 <- incidence_function3(df=jk.tmp3, nade_of_interest='METABOLIC')
# malignancy_incidence3 <- incidence_function3(df=jk.tmp3, nade_of_interest='MALIGNANCY')
renal_incidence3 <- incidence_function3(df=jk.tmp3, nade_of_interest='RENAL')
any_nade_incidence3 <- incidence_function3(df=jk.tmp3, nade_of_interest=c('CARDIOVASCULAR OR CEREBROVASCULAR','HEPATIC','METABOLIC','MALIGNANCY','RENAL'))
any_mh_nade_no_sa_incidence3 <- incidence_function3(df=jk.tmp3, nade_of_interest=c('CARDIOVASCULAR OR CEREBROVASCULAR','HEPATIC','MENTAL HEALTH','METABOLIC','MALIGNANCY','RENAL'))
death_incidence3 <- incidence_function3(df=jk.tmp3, nade_of_interest='DEATH')

incidence_rate_df3 <- data.frame('NADE Category'=c('CARDIOVASCULAR OR CEREBROVASCULAR',
                                                  'HEPATIC',
                                                  'MENTAL HEALTH',
                                                  'METABOLIC',
                                                  'RENAL',
                                                  'Any NADE',
                                                  'Any NADE (including mental health)',
                                                  'Death'),
                                 'on_hc2'=c(paste(cardio_incidence3[['incidence_hc']],' (',cardio_incidence3[['num_evt_hc']],')',sep=''),
                                              paste(hepatic_incidence3[['incidence_hc']],' (',hepatic_incidence3[['num_evt_hc']],')',sep=''),
                                              paste(mh_incidence3[['incidence_hc']],' (',mh_incidence3[['num_evt_hc']],')',sep=''),
                                              paste(metabolic_incidence3[['incidence_hc']],' (',metabolic_incidence3[['num_evt_hc']],')',sep=''),
                                              paste(renal_incidence3[['incidence_hc']],' (',renal_incidence3[['num_evt_hc']],')',sep=''),
                                              paste(any_nade_incidence3[['incidence_hc']],' (',any_nade_incidence3[['num_evt_hc']],')',sep=''),
                                              paste(any_mh_nade_no_sa_incidence3[['incidence_hc']],' (',any_mh_nade_no_sa_incidence3[['num_evt_hc']],')',sep=''),
                                              paste(death_incidence3[['incidence_hc']],' (',death_incidence3[['num_evt_hc']],')',sep='')),
                                'not_on_hc2'=c(paste(cardio_incidence3[['incidence_no_hc']],' (',cardio_incidence3[['num_evt_no_hc']],')',sep=''),
                                              paste(hepatic_incidence3[['incidence_no_hc']],' (',hepatic_incidence3[['num_evt_no_hc']],')',sep=''),
                                              paste(mh_incidence3[['incidence_no_hc']],' (',mh_incidence3[['num_evt_no_hc']],')',sep=''),
                                              paste(metabolic_incidence3[['incidence_no_hc']],' (',metabolic_incidence3[['num_evt_no_hc']],')',sep=''),
                                              paste(renal_incidence3[['incidence_no_hc']],' (',renal_incidence3[['num_evt_no_hc']],')',sep=''),
                                              paste(any_nade_incidence3[['incidence_no_hc']],' (',any_nade_incidence3[['num_evt_no_hc']],')',sep=''),
                                              paste(any_mh_nade_no_sa_incidence3[['incidence_no_hc']],' (',any_mh_nade_no_sa_incidence3[['num_evt_no_hc']],')',sep=''),
                                              paste(death_incidence3[['incidence_no_hc']],' (',death_incidence3[['num_evt_no_hc']],')',sep='')))

incidence_rate_df3 <- as.matrix(incidence_rate_df3)
colnames(incidence_rate_df3) <- c('NADE Category','On HC','Not on HC')


# Creating table for Jessie that has psychiatric diagnoses based on HC exposure, not ever/never use
jk.tmp3$nade_on_hcp.factor <- factor(jk.tmp3$nade_on_hcp, levels=c(0,1), labels=c('On hormonal contraception','Not on hormonal contraception'))
jk.mh <- subset(jk.tmp3, nade_category_new == 'MENTAL HEALTH' | nade_category2_new == 'MENTAL HEALTH')
jk.mh$mh_nade <- jk.tmp[match(jk.mh$pid.age,jk.tmp$pid.age,nomatch=NA),'mh_nade']
jk.mh$mh_nade <- factor(jk.mh$mh_nade)

mh_summ. <- summary(nade_on_hcp.factor ~ mh_nade, data=jk.mh, subset=!duplicated(pid.age), method='reverse',overall=TRUE)


jk <- subset(mh_nade_long, mh_nade_category == 'MENTAL HEALTH' & cfar_pid %in% time.df$cfar_pid & age_mh_nade <= last_age & (is.na(age_after_year_gap) | age_mh_nade < age_after_year_gap))
jk$pid.age <- paste(jk$cfar_pid,jk$age_mh_nade,sep='.')
jk$nade_on_hcp <- jk.mh[match(jk$pid.age,jk.mh$pid.age,nomatch=NA),'nade_on_hcp']

sj <- jk[which(duplicated(jk$pid.age) & jk$mh_nade_category == 'MENTAL HEALTH'),c('cfar_pid','pid.age','mh_nade','mh_nade_category')]
sub_sj <- subset(jk, pid.age%in% sj$pid.age & !duplicated(pid.age) & jk$mh_nade_category == 'MENTAL HEALTH',select=c('cfar_pid','pid.age','mh_nade','mh_nade_category'))


sj$nade_on_hcp <- jk.mh[match(sj$pid.age,jk.mh$pid.age,nomatch=NA),'nade_on_hcp']
sub_sj$nade_on_hcp <- jk.mh[match(sub_sj$pid.age,jk.mh$pid.age,nomatch=NA),'nade_on_hcp']


sj2 <- rbind(sj, subset(jk, pid.age %nin% sj$pid.age & !duplicated(pid.age), select=c(cfar_pid,pid.age,mh_nade,mh_nade_category,nade_on_hcp)))
sj2 <- rbind(sj2,sub_sj)


sj2$nade_on_hcp.factor <- factor(sj2$nade_on_hcp, levels=c(0,1), labels=c('Not on hormonal contraception','On hormonal contraception'))
sj2$mh_nade <- factor(sj2$mh_nade)


mh_summ.2 <- summary(nade_on_hcp.factor ~ mh_nade, data=sj2,method='reverse',overall=TRUE)


# Using counts used in models, not total ones
no.nades <- paste('NADE  (',nade[['msm2']]$freq[2],')',sep='')
no.mh.nades_only <- paste('Only mental health NADEs  (',mh_nade_only.mod[['msm2']]$freq[2],')',sep='')
no.mh.nades <- paste('NADEs including mental health  (',mh_nade.mod[['msm2']]$freq[2],')',sep='')
no.mh.nades.no.sa <- paste('NADEs including mental health  (',mh_nade_no_sa.mod[['msm2']]$freq[2],')',sep='')

simple_outcomes <- data.frame('Covariate'=c('Baseline age (rcs)',
                                            '~~~35 vs 30',
                                            '~~~35 vs 40',
                                            '~~~35 vs 50',
                                            'Month (per 6 months)',
                                            'Cumulative HCP use (per year)',
                                            'Year of T0 (per 3 years)',
                                            'Baseline CD4 (per 100)',
                                            'Baseline log viral load (per unit)',
                                            'HC use prior to T0',
                                            'NADE prior to T0',
                                            'Race:  White',
                                            '~~~Non-white',
                                            'Route of infection:  Non-IDU',
                                            '~~~IDU',
                                            'On HCP',
                                            'HCV prior to T0'),
                              'nade_or'=c('',nade_or[1],nade_or2,nade_or3,nade_or[2:6],
                                          nade_or[7],nade_or[8],
                                          format(1.00,nsmall=2),nade_or[9],
                                          format(1.00,nsmall=2),nade_or[10],
                                          nade_or[11:12]),
                              'nade_ci'=c('',nade_ci[1],nade_ci2,nade_ci3,nade_ci[2:6],nade_ci[7],nade_ci[8],
                                          '',nade_ci[9],'',nade_ci[10:12]),
                              'nade_p'=c(nade_p[1],'','','',nade_p[2:9],'',nade_p[10],'',nade_p[11:12]),

                              'mh_nade_only_or'=c('',mh_nade_only_or[1],mh_nade_only_or2,mh_nade_only_or3,
                                                  mh_nade_only_or[2:6],mh_nade_only_or[7],mh_nade_only_or[8],
                                          format(1.00,nsmall=2),mh_nade_only_or[9],format(1.00,nsmall=2),mh_nade_only_or[10],
                                          mh_nade_only_or[11:12]),
                              'mh_nade_only_ci'=c('',mh_nade_only_ci[1],mh_nade_only_ci2,mh_nade_only_ci3,
                                                  mh_nade_only_ci[2:6],mh_nade_only_ci[7],mh_nade_only_ci[8],
                                          '',mh_nade_only_ci[9],'',mh_nade_only_ci[10:12]),
                              'mh_nade_only_p'=c(mh_nade_only_p[1],'','','',mh_nade_only_p[2:9],'',mh_nade_only_p[10],'',mh_nade_only_p[11:12]),
                              'mh_nade_no_sa_or'=c('',mh_nade_no_sa_or[1],mh_nade_no_sa_or2,mh_nade_no_sa_or3,mh_nade_no_sa_or[2:6],
                                          mh_nade_no_sa_or[7],mh_nade_no_sa_or[8],
                                          format(1.00,nsmall=2),mh_nade_no_sa_or[9],
                                          format(1.00,nsmall=2),mh_nade_no_sa_or[10],
                                          mh_nade_no_sa_or[11:12]),
                              'mh_nade_no_sa_ci'=c('',mh_nade_no_sa_ci[1],mh_nade_no_sa_ci2,mh_nade_no_sa_ci3,mh_nade_no_sa_ci[2:6],mh_nade_no_sa_ci[7],mh_nade_no_sa_ci[8],
                                          '',mh_nade_no_sa_ci[9],'',mh_nade_no_sa_ci[10:12]),
                              'mh_nade_no_sa_p'=c(mh_nade_no_sa_p[1],'','','',mh_nade_no_sa_p[2:9],'',mh_nade_no_sa_p[10],'',mh_nade_no_sa_p[11:12]))
simple_outcomes <- as.matrix(simple_outcomes)
colnames(simple_outcomes) <- c('Covariate',rep(c('OR','95\\% CI','P'),times=3))


no.nade.dth <- paste('NADE/Death  (',nade_dth[['msm2']]$freq[2],')',sep='')
no.mh.nade_only.dth <- paste('Mental health NADEs/Death  (',mh_nade_only_dth[['msm2']]$freq[2],')',sep='')
no.mh.nade.dth <- paste('NADE, including all mental health ones  (',mh_nade_dth[['msm2']]$freq[2],')',sep='')
no.mh.nade.no.sa.dth <- paste('NADE, including mental health/Death  (',mh_nade_no_sa_dth[['msm2']]$freq[2],')',sep='')

compound_outcomes <- data.frame('Covariate'=c('Baseline age (rcs)',
                                            '~~~35 vs 30',
                                            '~~~35 vs 40',
                                            '~~~35 vs 50',
                                            'Month (per 6 months)',
                                            'Cumulative HCP use (per year)',
                                            'Year of T0 (per 3 years)',
                                            'Baseline CD4 (per 100)',
                                            'Baseline log viral load (per unit)',
                                            'HC use prior to T0',
                                            'NADE prior to T0',
                                            'Race:  White',
                                            '~~~Non-white',
                                            'Route of infection:  Non-IDU',
                                            '~~~IDU',
                                            'On HCP',
                                            'HCV prior to T0'),
                              'nade_dth_or'=c('',nade_dth_or[1],nade_dth_or2,nade_dth_or3,nade_dth_or[2:6],
                                          nade_dth_or[7],nade_dth_or[8],
                                          format(1.00,nsmall=2),nade_dth_or[9],
                                          format(1.00,nsmall=2),nade_dth_or[10],
                                          nade_dth_or[11:12]),
                              'nade_ci'=c('',nade_dth_ci[1],nade_dth_ci2,nade_dth_ci3,nade_dth_ci[2:6],nade_dth_ci[7],nade_dth_ci[8],
                                          '',nade_dth_ci[9],'',nade_dth_ci[10:12]),
                              'nade_p'=c(nade_dth_p[1],'','','',nade_dth_p[2:9],'',nade_dth_p[10],'',nade_dth_p[11:12]),

                              'mh_nade_only_or'=c('',mh_nade_only_dth_or[1],mh_nade_only_dth_or2,mh_nade_only_dth_or3,
                                                  mh_nade_only_dth_or[2:6],mh_nade_only_dth_or[7],mh_nade_only_dth_or[8],
                                          format(1.00,nsmall=2),mh_nade_only_dth_or[9],format(1.00,nsmall=2),mh_nade_only_dth_or[10],
                                          mh_nade_only_dth_or[11:12]),
                              'mh_nade_only_ci'=c('',mh_nade_only_dth_ci[1],mh_nade_only_dth_ci2,mh_nade_only_dth_ci3,mh_nade_only_dth_ci[2:6],
                                                  mh_nade_only_dth_ci[7],mh_nade_only_dth_ci[8],'',
                                                  mh_nade_only_dth_ci[9],'',mh_nade_only_dth_ci[10:12]),
                              'mh_nade_only_p'=c(mh_nade_only_dth_p[1],'','','',mh_nade_only_dth_p[2:9],'',mh_nade_only_dth_p[10],'',
                                                 mh_nade_only_dth_p[11:12]),
                              'mh_nade_no_sa_or'=c('',mh_nade_no_sa_dth_or[1],mh_nade_no_sa_dth_or2,mh_nade_no_sa_dth_or3,mh_nade_no_sa_dth_or[2:6],
                                          mh_nade_no_sa_dth_or[7],mh_nade_no_sa_dth_or[8],
                                          format(1.00,nsmall=2),mh_nade_no_sa_dth_or[9],
                                          format(1.00,nsmall=2),mh_nade_no_sa_dth_or[10],
                                          mh_nade_no_sa_dth_or[11:12]),
                              'mh_nade_no_sa_ci'=c('',mh_nade_no_sa_dth_ci[1],mh_nade_no_sa_dth_ci2,mh_nade_no_sa_dth_ci3,mh_nade_no_sa_dth_ci[2:6],mh_nade_no_sa_dth_ci[7],
                                             mh_nade_no_sa_dth_ci[8],
                                          '',mh_nade_no_sa_dth_ci[9],'',mh_nade_no_sa_dth_ci[10:12]),
                              'mh_nade_no_sa_p'=c(mh_nade_no_sa_dth_p[1],'','','',mh_nade_no_sa_dth_p[2:9],'',mh_nade_no_sa_dth_p[10],'',mh_nade_no_sa_dth_p[11:12]))
compound_outcomes <- as.matrix(compound_outcomes)
colnames(compound_outcomes) <- c('Covariate',rep(c('OR','95\\% CI','P'),times=3))


# Just death as outcome
no.of.deaths <- table(df$death_outcome_indicator)[2]
death_outcome <- data.frame('Covariate'=c('Cumulative HCP use (per year)','On HCP'),
                            'OR'=death_simple_or,
                            'CI'=death_simple_ci,
                            'P'=death_simple_p)


# How many women received HC at any point during followup
number.w.hc <- tapply(time.df$hcp_ind, INDEX=time.df$cfar_pid, FUN=function(y){as.numeric(any(y == 1))})


no.nade.depo <- paste('NADE  (',nade.depo[['msm2']]$freq[2],')',sep='')
no.mh_nade_only.depo <- paste('Mental health NADEs only  (',mh_nade_only.depo.mod[['msm2']]$freq[2],')',sep='')
no.mh_nade.depo <- paste('NADEs, including mental health ones  (',mh_nade.depo.mod[['msm2']]$freq[2],')',sep='')
no.mh_nade_no_sa.depo <- paste('NADEs, including mental health ones  (',mh_nade_no_sa.depo.mod[['msm2']]$freq[2],')',sep='')

simple_depo_outcomes <- data.frame('Covariate'=c('Baseline age (rcs)',
                                            '~~~35 vs 30',
                                            '~~~35 vs 40',
                                            '~~~35 vs 50',
                                            'Month (per 6 months)',
                                            'Cumulative Depo use (per year)',
                                            'Year of T0 (per 3 years)',
                                            'Baseline CD4 (per 100)',
                                            'Baseline log viral load (per unit)',
                                            'HC use prior to T0',
                                            'NADE prior to T0',
                                            'Race:  White',
                                            '~~~Non-white',
                                            'Route of infection:  Non-IDU',
                                            '~~~IDU',
                                            'On Depo',
                                            'HCV prior to T0'),
                              'nade_depo_or'=c('',nade_depo_or[1],nade_depo_or2,nade_depo_or3,nade_depo_or[2:6],
                                          nade_depo_or[7],nade_depo_or[8],
                                          format(1.00,nsmall=2),nade_depo_or[9],
                                          format(1.00,nsmall=2),nade_depo_or[10],
                                          nade_depo_or[11:12]),
                              'nade_depo_ci'=c('',nade_depo_ci[1],nade_depo_ci2,nade_depo_ci3,nade_depo_ci[2:6],nade_depo_ci[7],nade_depo_ci[8],
                                          '',nade_depo_ci[9],'',nade_depo_ci[10:12]),
                              'nade_depo_p'=c(nade_depo_p[1],'','','',nade_depo_p[2:9],'',nade_depo_p[10],'',nade_depo_p[11:12]),

                              'mh_nade_only_depo_or'=c('',mh_nade_only_depo_or[1],mh_nade_only_depo_or2,
                                                       mh_nade_only_depo_or3,mh_nade_only_depo_or[2:6],
                                          mh_nade_only_depo_or[7],mh_nade_only_depo_or[8],
                                          format(1.00,nsmall=2),mh_nade_only_depo_or[9],
                                          format(1.00,nsmall=2),mh_nade_only_depo_or[10],
                                          mh_nade_only_depo_or[11:12]),
                              'mh_nade_only_depo_ci'=c('',mh_nade_only_depo_ci[1],mh_nade_only_depo_ci2,mh_nade_only_depo_ci3,
                                                       mh_nade_only_depo_ci[2:6],mh_nade_only_depo_ci[7],
                                             mh_nade_only_depo_ci[8],'',mh_nade_only_depo_ci[9],'',mh_nade_only_depo_ci[10:12]),
                              'mh_nade_only_depo_p'=c(mh_nade_only_depo_p[1],'','','',mh_nade_only_depo_p[2:9],'',mh_nade_only_depo_p[10],
                                                      '',mh_nade_only_depo_p[11:12]),
                              'mh_nade_no_sa_depo_or'=c('',mh_nade_no_sa_depo_or[1],mh_nade_no_sa_depo_or2,mh_nade_no_sa_depo_or3,mh_nade_no_sa_depo_or[2:6],
                                          mh_nade_no_sa_depo_or[7],mh_nade_no_sa_depo_or[8],
                                          format(1.00,nsmall=2),mh_nade_no_sa_depo_or[9],
                                          format(1.00,nsmall=2),mh_nade_no_sa_depo_or[10],
                                          mh_nade_no_sa_depo_or[11:12]),
                              'mh_nade_no_sa_depo_ci'=c('',mh_nade_no_sa_depo_ci[1],mh_nade_no_sa_depo_ci2,mh_nade_no_sa_depo_ci3,mh_nade_no_sa_depo_ci[2:6],mh_nade_no_sa_depo_ci[7],
                                             mh_nade_no_sa_depo_ci[8],
                                          '',mh_nade_no_sa_depo_ci[9],'',mh_nade_no_sa_depo_ci[10:12]),
                              'mh_nade_no_sa_depo_p'=c(mh_nade_no_sa_depo_p[1],'','','',mh_nade_no_sa_depo_p[2:9],'',mh_nade_no_sa_depo_p[10],'',mh_nade_no_sa_depo_p[11:12]))

simple_depo_outcomes <- as.matrix(simple_depo_outcomes)
colnames(simple_depo_outcomes) <- c('Covariate',rep(c('OR','95\\% CI','P'),times=3))

no.nade_dth.depo <- paste('NADE/Death  (',nade_dth_depo[['msm2']]$freq[2],')',sep='')
no.mh_nade_only_dth.depo <- paste('Mental health NADEs/Death  (',mh_nade_only_dth_depo[['msm2']]$freq[2],')',sep='')
no.mh_mh_nade_dth.depo <- paste('NADE, including mental health/Death  (',mh_nade_dth_depo[['msm2']]$freq[2],')',sep='')
no.mh_nade_no_sa_dth.depo <- paste('NADE, including mental health/Death  (',mh_nade_no_sa_dth_depo[['msm2']]$freq[2],')',sep='')

compound_depo_outcomes <- data.frame('Covariate'=c('Baseline age (rcs)',
                                            '~~~35 vs 30',
                                            '~~~35 vs 40',
                                            '~~~35 vs 50',
                                            'Month (per 6 months)',
                                            'Cumulative Depo use (per year)',
                                            'Year of T0 (per 3 years)',
                                            'Baseline CD4 (per 100)',
                                            'Baseline log viral load (per unit)',
                                            'HC use prior to T0',
                                            'NADE prior to T0',
                                            'Race:  White',
                                            '~~~Non-white',
                                            'Route of infection:  Non-IDU',
                                            '~~~IDU',
                                            'On Depo',
                                            'HCV prior to T0'),
                              'nade_dth_depo_or'=c('',nade_dth_depo_or[1],nade_dth_depo_or2,nade_dth_depo_or3,
                                                   nade_dth_depo_or[2:6],
                                          nade_dth_depo_or[7],nade_dth_depo_or[8],
                                          format(1.00,nsmall=2),nade_dth_depo_or[9],
                                          format(1.00,nsmall=2),nade_dth_depo_or[10],
                                          nade_dth_depo_or[11:12]),
                              'nade_dth_depo_ci'=c('',nade_dth_depo_ci[1],nade_dth_depo_ci2,nade_dth_depo_ci3,nade_dth_depo_ci[2:6],
                                               nade_dth_depo_ci[7],nade_dth_depo_ci[8],'',nade_dth_depo_ci[9],'',nade_dth_depo_ci[10:12]),
                              'nade_dth_depo_p'=c(nade_dth_depo_p[1],'','','',nade_dth_depo_p[2:9],'',nade_dth_depo_p[10],'',
                                                  nade_dth_depo_p[11:12]),

                              'mh_nade_only_dth_depo_or'=c('',mh_nade_only_dth_depo_or[1],mh_nade_only_dth_depo_or2,
                                                           mh_nade_only_dth_depo_or3,mh_nade_only_dth_depo_or[2:6],
                                          mh_nade_only_dth_depo_or[7],mh_nade_only_dth_depo_or[8],
                                          format(1.00,nsmall=2),mh_nade_only_dth_depo_or[9],
                                          format(1.00,nsmall=2),mh_nade_only_dth_depo_or[10],mh_nade_only_dth_depo_or[11:12]),
                              'mh_nade_only_dth_depo_ci'=c('',mh_nade_only_dth_depo_ci[1],mh_nade_only_dth_depo_ci2,mh_nade_only_dth_depo_ci3,
                                                           mh_nade_only_dth_depo_ci[2:6],mh_nade_only_dth_depo_ci[7],
                                                           mh_nade_only_dth_depo_ci[8],'',mh_nade_only_dth_depo_ci[9],'',
                                                           mh_nade_only_dth_depo_ci[10:12]),
                              'mh_nade_only_dth_depo_p'=c(mh_nade_only_dth_depo_p[1],'','','',mh_nade_only_dth_depo_p[2:9],'',
                                                          mh_nade_only_dth_depo_p[10],'',mh_nade_only_dth_depo_p[11:12]),
                              'mh_nade_no_sa_dth_depo_or'=c('',mh_nade_no_sa_dth_depo_or[1],mh_nade_no_sa_dth_depo_or2,mh_nade_no_sa_dth_depo_or3,
                                             mh_nade_no_sa_dth_depo_or[2:6],
                                          mh_nade_no_sa_dth_depo_or[7],mh_nade_no_sa_dth_depo_or[8],
                                          format(1.00,nsmall=2),mh_nade_no_sa_dth_depo_or[9],
                                          format(1.00,nsmall=2),mh_nade_no_sa_dth_depo_or[10],
                                          mh_nade_no_sa_dth_depo_or[11:12]),
                              'mh_nade_no_sa_dth_depo_ci'=c('',mh_nade_no_sa_dth_depo_ci[1],mh_nade_no_sa_dth_depo_ci2,mh_nade_no_sa_dth_depo_ci3,mh_nade_no_sa_dth_depo_ci[2:6],
                                                  mh_nade_no_sa_dth_depo_ci[7],mh_nade_no_sa_dth_depo_ci[8],
                                          '',mh_nade_no_sa_dth_depo_ci[9],'',mh_nade_no_sa_dth_depo_ci[10:12]),
                              'mh_nade_no_sa_dth_depo_p'=c(mh_nade_no_sa_dth_depo_p[1],'','','',mh_nade_no_sa_dth_depo_p[2:9],'',mh_nade_no_sa_dth_depo_p[10],'',
                                                 mh_nade_no_sa_dth_depo_p[11:12]))

compound_depo_outcomes <- as.matrix(compound_depo_outcomes)
colnames(compound_depo_outcomes) <- c('Covariate',rep(c('OR','95\\% CI','P'),times=3))

# Just death as outcome
death_depo_outcome <- data.frame('Covariate'=c('Cumulative Depo use (per year)','On Depo'),
                            'OR'=death_simple_depo_or,
                            'CI'=death_simple_depo_ci,
                            'P'=death_simple_depo_p)


cj <- subset(time.df, !duplicated(cfar_pid))
cj$for_exposure_time <- with(cj, last_age - age_at_t0)
sum(cj$for_exposure_time[which(cj$any_hc_use.factor == 'No HC use')])
sum(cj$for_exposure_time[which(cj$any_hc_use.factor == 'HC use')])