#--------------------------------# # Loading libraries # #--------------------------------# rm(list=ls()) options(width=180) library(foreign) library(Hmisc) library(rms) #--------------------------------# # Functions # #--------------------------------# source('mylatex.summary.formula.reverse.R') #--------------------------------# # Loading data # #--------------------------------# mh_nade <- read.dta('deid_nades_prior_mh_01may2013.dta',convert.factors=FALSE) names(mh_nade) <- tolower(names(mh_nade)) names(mh_nade) <- c('age_at_mh_nade_after_t0_date1','age_at_mh_nade_after_t0_date2', 'age_at_mh_nade_after_t0_date3','age_at_mh_nade_after_t0_date4', 'age_at_mh_nade_after_t0_date5','age_at_mh_nade_after_t0_date6', 'cfar_pid','mh_non_ade_after_t01','mh_non_ade_after_t02', 'mh_non_ade_after_t03','mh_non_ade_after_t04', 'mh_non_ade_after_t05','mh_non_ade_after_t06', 'mh_non_ade_category1','mh_non_ade_category2', 'mh_non_ade_category3','mh_non_ade_category4', 'mh_non_ade_category5','mh_non_ade_category6', 'prior_mh_non_ade','prior_mh_non_ade_no_sa', 'prior_mh_non_ade_only') # Hard coding correction for 993837F -- oth_nade shows they had a NADE. mh_nade shows it occurred in the type but # not in the ages mh_nade$age_at_mh_nade_after_t0_date1 <- ifelse(mh_nade$cfar_pid == '993837F',31.3977, mh_nade$age_at_mh_nade_after_t0_date1) # Need to remove instances of NON-HODGKIN'S LYMPHOMA, LARGE CELL and NON-HODGKIN'S LYMPHOMA, UNSPECIFIED which.nhl1 <- which(mh_nade$mh_non_ade_after_t01 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED")) which.nhl2 <- which(mh_nade$mh_non_ade_after_t02 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED")) which.nhl3 <- which(mh_nade$mh_non_ade_after_t03 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED")) which.nhl4 <- which(mh_nade$mh_non_ade_after_t04 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED")) which.nhl5 <- which(mh_nade$mh_non_ade_after_t05 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED")) which.nhl6 <- which(mh_nade$mh_non_ade_after_t06 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED")) # Moving those with these diagnoses mh_nade$mh_non_ade_after_t01[which.nhl1] <- mh_nade$age_at_mh_nade_after_t0_date1[which.nhl1] <- mh_nade$mh_non_ade_category1[which.nhl1] <- NA mh_nade$mh_non_ade_after_t01[which.nhl1] <- mh_nade$mh_non_ade_after_t02[which.nhl1] mh_nade$mh_non_ade_after_t02[which.nhl1] <- mh_nade$mh_non_ade_after_t03[which.nhl1] mh_nade$mh_non_ade_after_t03[which.nhl1] <- mh_nade$mh_non_ade_after_t04[which.nhl1] mh_nade$mh_non_ade_after_t04[which.nhl1] <- mh_nade$mh_non_ade_after_t05[which.nhl1] mh_nade$mh_non_ade_after_t05[which.nhl1] <- mh_nade$mh_non_ade_after_t06[which.nhl1] mh_nade$age_at_mh_nade_after_t0_date1[which.nhl1] <- mh_nade$age_at_mh_nade_after_t0_date2[which.nhl1] mh_nade$age_at_mh_nade_after_t0_date2[which.nhl1] <- mh_nade$age_at_mh_nade_after_t0_date3[which.nhl1] mh_nade$age_at_mh_nade_after_t0_date3[which.nhl1] <- mh_nade$age_at_mh_nade_after_t0_date4[which.nhl1] mh_nade$age_at_mh_nade_after_t0_date4[which.nhl1] <- mh_nade$age_at_mh_nade_after_t0_date5[which.nhl1] mh_nade$age_at_mh_nade_after_t0_date5[which.nhl1] <- mh_nade$age_at_mh_nade_after_t0_date6[which.nhl1] mh_nade$mh_non_ade_category1[which.nhl1] <- mh_nade$mh_non_ade_category2[which.nhl1] mh_nade$mh_non_ade_category2[which.nhl1] <- mh_nade$mh_non_ade_category3[which.nhl1] mh_nade$mh_non_ade_category3[which.nhl1] <- mh_nade$mh_non_ade_category4[which.nhl1] mh_nade$mh_non_ade_category4[which.nhl1] <- mh_nade$mh_non_ade_category5[which.nhl1] mh_nade$mh_non_ade_category5[which.nhl1] <- mh_nade$mh_non_ade_category6[which.nhl1] # And for nhl2 mh_nade$age_at_mh_nade_after_t0_date2[which.nhl2] <- mh_nade$mh_non_ade_category2[which.nhl2] <- NA mh_nade$mh_non_ade_after_t02[which.nhl2] <- mh_nade$mh_non_ade_after_t03[which.nhl2] mh_nade$mh_non_ade_after_t03[which.nhl2] <- mh_nade$mh_non_ade_after_t04[which.nhl2] mh_nade$mh_non_ade_after_t04[which.nhl2] <- mh_nade$mh_non_ade_after_t05[which.nhl2] mh_nade$mh_non_ade_after_t05[which.nhl2] <- mh_nade$mh_non_ade_after_t06[which.nhl2] mh_nade$age_at_mh_nade_after_t0_date2[which.nhl2] <- mh_nade$age_at_mh_nade_after_t0_date3[which.nhl2] mh_nade$age_at_mh_nade_after_t0_date3[which.nhl2] <- mh_nade$age_at_mh_nade_after_t0_date4[which.nhl2] mh_nade$age_at_mh_nade_after_t0_date4[which.nhl2] <- mh_nade$age_at_mh_nade_after_t0_date5[which.nhl2] mh_nade$age_at_mh_nade_after_t0_date5[which.nhl2] <- mh_nade$age_at_mh_nade_after_t0_date6[which.nhl2] mh_nade$mh_non_ade_category2[which.nhl2] <- mh_nade$mh_non_ade_category3[which.nhl2] mh_nade$mh_non_ade_category3[which.nhl2] <- mh_nade$mh_non_ade_category4[which.nhl2] mh_nade$mh_non_ade_category4[which.nhl2] <- mh_nade$mh_non_ade_category5[which.nhl2] mh_nade$mh_non_ade_category5[which.nhl2] <- mh_nade$mh_non_ade_category6[which.nhl2] oth_nade <- read.dta('deid_nades_no_mh_subst_26mar12.dta',convert.factors=FALSE) names(oth_nade) <- tolower(names(oth_nade)) which.nhl1 <- which(oth_nade$non_ade_after_t01 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED")) which.nhl2 <- which(oth_nade$non_ade_after_t02 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED")) which.nhl3 <- which(oth_nade$non_ade_after_t03 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED")) which.nhl4 <- which(oth_nade$non_ade_after_t04 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED")) which.nhl5 <- which(oth_nade$non_ade_after_t05 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED")) which.nhl6 <- which(oth_nade$non_ade_after_t06 %in% c("NON-HODGKIN'S LYMPHOMA, LARGE CELL", "NON-HODGKIN'S LYMPHOMA, UNSPECIFIED")) # Moving those with these diagnoses oth_nade$non_ade_after_t01[which.nhl1] <- oth_nade$age_at_nade_after_t0_date1[which.nhl1] <- oth_nade$non_ade_category1[which.nhl1] <- NA oth_nade$non_ade_after_t01[which.nhl1] <- oth_nade$non_ade_after_t02[which.nhl1] oth_nade$non_ade_after_t02[which.nhl1] <- oth_nade$non_ade_after_t03[which.nhl1] oth_nade$non_ade_after_t03[which.nhl1] <- oth_nade$non_ade_after_t04[which.nhl1] oth_nade$age_at_nade_after_t0_date1[which.nhl1] <- oth_nade$age_at_nade_after_t0_date2[which.nhl1] oth_nade$age_at_nade_after_t0_date2[which.nhl1] <- oth_nade$age_at_nade_after_t0_date3[which.nhl1] oth_nade$age_at_nade_after_t0_date3[which.nhl1] <- oth_nade$age_at_nade_after_t0_date4[which.nhl1] oth_nade$non_ade_category1[which.nhl1] <- oth_nade$non_ade_category2[which.nhl1] oth_nade$non_ade_category2[which.nhl1] <- oth_nade$non_ade_category3[which.nhl1] oth_nade$non_ade_category3[which.nhl1] <- oth_nade$non_ade_category4[which.nhl1] # And for nhl2 oth_nade$age_at_nade_after_t0_date2[which.nhl2] <- oth_nade$non_ade_category2[which.nhl2] <- NA oth_nade$non_ade_after_t02[which.nhl2] <- oth_nade$non_ade_after_t03[which.nhl2] oth_nade$non_ade_after_t03[which.nhl2] <- oth_nade$non_ade_after_t04[which.nhl2] oth_nade$age_at_nade_after_t0_date2[which.nhl2] <- oth_nade$age_at_nade_after_t0_date3[which.nhl2] oth_nade$age_at_nade_after_t0_date3[which.nhl2] <- oth_nade$age_at_nade_after_t0_date4[which.nhl2] oth_nade$non_ade_category2[which.nhl2] <- oth_nade$non_ade_category3[which.nhl2] oth_nade$non_ade_category3[which.nhl2] <- oth_nade$non_ade_category4[which.nhl2] oth_nade$non_ade_category2 <- ifelse(oth_nade$non_ade_category2 %in% c('','MENTAL HEALTH'),NA,oth_nade$non_ade_category2) oth_nade$age_at_nade_after_t0_date2 <- ifelse(is.na(oth_nade$non_ade_category2),NA,oth_nade$age_at_nade_after_t0_date2) oth_nade$non_ade_after_t02 <- ifelse(is.na(oth_nade$non_ade_category2),NA,oth_nade$non_ade_after_t02) oth_nade$non_ade_category1 <- ifelse(oth_nade$non_ade_category1 == '',NA,oth_nade$non_ade_category1) oth_nade$non_ade_category3 <- ifelse(oth_nade$non_ade_category3 == '',NA,oth_nade$non_ade_category3) oth_nade$non_ade_category4 <- ifelse(oth_nade$non_ade_category4 == '',NA,oth_nade$non_ade_category4) data <- read.dta('hormonal_contr_deid_07jun2012.dta',convert.factors=FALSE) names(data) <- tolower(names(data)) # Need to reset to be 399 if below that data$baseline_vl <- ifelse(!is.na(data$baseline_vl) & data$baseline_vl < 399, 399, data$baseline_vl) # ------------------------------------------------------------------ # # Creating long version of data frame # # ------------------------------------------------------------------ # # NADEs excluding mental health NADEs params <- grep('age_at_nade_after_t0_date', names(oth_nade), value=TRUE) oth_nade_long <- na.omit(reshape(oth_nade, params, v.names='age_nade',timevar='nade',idvar='cfar_pid', direction='long',sep=',',drop=setdiff(names(data),c('cfar_pid',params)))) oth_nade_long$age_at_t0 <- data[match(oth_nade_long$cfar_pid,data$cfar_pid,nomatch=NA),'age_at_t0'] nade_outcome <- lapply(split(subset(oth_nade_long, select=c(cfar_pid,age_nade,age_at_t0)),oth_nade_long$cfar_pid),FUN=function(y){ if(any(!is.na(y[,2])) & any(y[which(!is.na(y[,2])),2] > y[which(!is.na(y[,2])),3])){ nade <- 1 } else { nade <- 0 } if(nade == 1){ age_at_first_nade <- min(y[,2],na.rm=TRUE) } else { age_at_first_nade <- NA } df <- data.frame('cfar_pid'=y[1,1], 'nade_outcome'=nade, 'age_at_first_nade'=age_at_first_nade) return(df)}) nade_outcome.df <- do.call('rbind',nade_outcome) # NADEs including mental health NADEs params <- grep('age_at_mh_nade_after_t0_date', names(mh_nade), value=TRUE) params2 <- grep('^mh_non_ade_after_t0',names(mh_nade),value=TRUE) params3 <- grep('mh_non_ade_category',names(mh_nade),value=TRUE) mh_nade_long <- na.omit(reshape(mh_nade, list(params, params2,params3), v.names=c('age_mh_nade','mh_nade','mh_nade_category'),timevar='time_mh_nade',idvar='cfar_pid', direction='long',sep=',',drop=setdiff(names(data),c('cfar_pid',params,params2)))) mh_nade_long$age_at_t0 <- data[match(mh_nade_long$cfar_pid,data$cfar_pid,nomatch=NA),'age_at_t0'] mh_nade_outcome <- lapply(split(subset(mh_nade_long, select=c(cfar_pid,age_mh_nade,age_at_t0)),mh_nade_long$cfar_pid),FUN=function(y){ if(any(!is.na(y[,2])) & any(y[which(!is.na(y[,2])),2] > y[which(!is.na(y[,2])),3])){ mh_nade <- 1 } else { mh_nade <- 0 } if(mh_nade == 1){ age_at_first_mh_nade <- min(y[,2],na.rm=TRUE) } else { age_at_first_mh_nade <- NA } df <- data.frame('cfar_pid'=y[1,1], 'mh_nade_outcome'=mh_nade, 'age_at_first_mh_nade'=age_at_first_mh_nade) return(df)}) mh_nade_outcome.df <- do.call('rbind',mh_nade_outcome) # Want an analysis of just the mental health NADEs mh_nade_only <- subset(mh_nade_long, mh_nade_category == 'MENTAL HEALTH') mh_nade_only$pid.age <- paste(mh_nade_only$cfar_pid,mh_nade_only$age,sep='.') # Want an analysis of just the mental health NADEs mh_nade_no_sa <- subset(mh_nade_long, mh_nade_category != 'SUBSTANCE ABUSE') mh_nade_no_sa$pid.age <- paste(mh_nade_no_sa$cfar_pid,mh_nade_no_sa$age,sep='.') #---------------------------# params <- grep('age_at_',names(data),value=TRUE) ages <- na.omit(reshape(data, params, v.names='age',timevar='visit',idvar='cfar_pid',direction='long',sep=',', drop=setdiff(names(data),c('cfar_pid',params)))) which.rows <- which(duplicated(ages[,-2])) ages <- ages[-which.rows,-which(names(ages) == 'visit')] ages$age_at_first_nade <- nade_outcome.df[match(ages$cfar_pid,nade_outcome.df$cfar_pid,nomatch=NA),'age_at_first_nade'] ages$age_at_first_mh_nade <- mh_nade_outcome.df[match(ages$cfar_pid,mh_nade_outcome.df$cfar_pid,nomatch=NA),'age_at_first_mh_nade'] # ages$age_at_first_mh_nade_no_sa <- mh_nade_no_sa_outcome.df[match(ages$cfar_pid,mh_nade_no_sa_outcome.df$cfar_pid,nomatch=NA),'age_at_first_mh_nade'] ages$age_at_death <- data[match(ages$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_death'] ages$age_at_t0 <- data[match(ages$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_t0'] time_diff <- lapply(split(subset(ages, select=c(cfar_pid,age,age_at_t0)),ages$cfar_pid), FUN=function(y){ y <- y[order(y[,'age']),] diff. <- diff(y[,2]) df <- data.frame('cfar_pid'=y[-1,1], 'age'=y[-1,2], 'diff.'=diff., 'age_at_t0'=y[-1,'age_at_t0']) return(df)}) time_diff.df <- do.call('rbind',time_diff) time_diff.df$year_gap <- ifelse(time_diff.df$diff. > 1 & time_diff.df$age > time_diff.df$age_at_t0,1,0) which.rows <- which(time_diff.df$year_gap == 1) time_diff.df$age_after_year_gap <- NA time_diff.df$age_after_year_gap[which.rows] <- time_diff.df$age[which.rows] # Three are some IDs with multiple gaps of more than a year. Want the first one so wil order by ID and age and then take the first for that ID tmp <- subset(time_diff.df, !is.na(time_diff.df$age_after_year_gap), select=c(cfar_pid,age_after_year_gap)) tmp <- tmp[order(tmp$cfar_pid,tmp$age_after_year_gap),] tmp <- subset(tmp, !duplicated(cfar_pid)) ages$age_after_year_gap <- tmp[match(ages$cfar_pid,tmp$cfar_pid,nomatch=NA),'age_after_year_gap'] ages <- ages[order(ages$cfar_pid,ages$age),] sub_ages <- subset(ages, is.na(age_after_year_gap) | (age < age_after_year_gap & age >= age_at_t0)) # Finding last age of last record for subject last_age <- lapply(split(subset(sub_ages, select=c(cfar_pid, age)),sub_ages$cfar_pid), FUN=function(y){ last_age <- max(y[,'age']) df <- data.frame('cfar_pid'=y[1,'cfar_pid'], 'last_age'=last_age) return(df)}) last_age.df <- do.call('rbind',last_age) sub_ages$last_age <- last_age.df[match(sub_ages$cfar_pid, last_age.df$cfar_pid, nomatch=NA),'last_age'] # It could be that age_at_first_... happened after year gap so need to reset those to NA if that is true sub_ages$age_at_first_nade <- ifelse(sub_ages$last_age < sub_ages$age_at_first_nade, NA, sub_ages$age_at_first_nade) sub_ages$age_at_first_mh_nade <- ifelse(sub_ages$last_age < sub_ages$age_at_first_mh_nade, NA, sub_ages$age_at_first_mh_nade) # sub_ages$age_at_first_mh_nade_no_sa <- ifelse(sub_ages$last_age < sub_ages$age_at_first_mh_nade_no_sa, NA, sub_ages$age_at_first_mh_nade_no_sa) sub_ages$age_at_death <- ifelse(sub_ages$last_age < sub_ages$age_at_death, NA, sub_ages$age_at_death) # Determining the maxium follow-up time for any person regardless of outcome max_age <- lapply(split(subset(sub_ages, select=c(cfar_pid,age_at_first_nade,age_at_first_mh_nade,age_at_death,last_age)),sub_ages$cfar_pid), FUN=function(y){ max_age <- max(y[,-1],na.rm=TRUE) df <- data.frame('cfar_pid'=y[1,'cfar_pid'], 'max_age'=max_age) return(df)}) max_age.df <- do.call('rbind',max_age) sub_ages$max_age <- max_age.df[match(sub_ages$cfar_pid, max_age.df$cfar_pid, nomatch=NA),'max_age'] sub_ages <- sub_ages[order(sub_ages$cfar_pid,sub_ages$age),] sub_ages$month <- with(sub_ages, (age - age_at_t0)*12+1) sub_ages$month_rounded <- ceiling(sub_ages$month) # Starting monthly data frame by_month <- lapply(split(subset(sub_ages, select=c(cfar_pid, month_rounded)), sub_ages$cfar_pid),FUN=function(y){ followup_time <- seq(1,max(y[,'month_rounded'])) df <- data.frame('cfar_pid'=rep(y[1,1],times=length(followup_time)), 'month'=followup_time) return(df)}) time.df <- do.call('rbind',by_month) time.df$age_at_t0 <- sub_ages[match(time.df$cfar_pid, sub_ages$cfar_pid, nomatch=NA),'age_at_t0'] time.df$age <- ifelse(time.df$month == 1, time.df$age_at_t0, NA) time.df$age <- ifelse(time.df$month > 1, (time.df$age_at_t0 + (time.df$month-1)/12),time.df$age) # Adding in fixed data time.df$age_at_death <- data[match(time.df$cfar_pid,data$cfar_pid,nomatch=NA),'age_at_death'] time.df$age_at_datedelivery <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_datedelivery'] time.df$age_at_first_haart_end <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_first_haart_end'] time.df$age_at_first_haart_start <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_first_haart_start'] time.df$age_at_first_hcp_start <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_first_hcp_start'] time.df$age_at_first_hcp_end <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_first_hcp_end'] time.df$age_at_first_visit <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_first_visit'] time.df$age_at_last_visit <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_last_visit'] time.df$baseline_hemoglobin <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'baseline_hemoglobin'] time.df$baseline_cd4_percent <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'baseline_cd4_percent'] time.df$baseline_cd4_count <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'baseline_cd4_count'] time.df$baseline_vl <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'baseline_vl'] time.df$prior_hcp_t0 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'hcp_prior_t0'] time.df$non_idu_prior_t0 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'non_idu_prior_t0'] time.df$prior_haart <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'prior_haart'] time.df$prior_non_haart_art <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'prior_non_haart_art'] time.df$race <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'race'] time.df$race.factor <- factor(time.df$race, levels=c(0:10), labels=c('Undefined','Caucasian','AA','Hispanic','Native American/Eskimo', 'Asian','Pacific Islander','Indian','Middle Eastern','Other','Mixed race')) time.df$race_combined <- ifelse(time.df$race %in% c(0,3,4,5,6,7,8,9,10),0,time.df$race) time.df$race_combined.factor <- factor(time.df$race_combined, levels=c(0,1,2), labels=c('Other','Caucasian','AA')) time.df$race_2level <- ifelse(time.df$race_combined == 1 & !is.na(time.df$race_combined),1, ifelse(!is.na(time.df$race_combined) & time.df$race_combined %in% c(0,2),0,NA)) time.df$race_2level.factor <- factor(time.df$race_2level, levels=c(0,1), labels=c('Non-white','White')) time.df$smoking_during_t0 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'smoking_during_t0'] time.df$year_of_first_haart <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'year_of_first_haart'] time.df$year_of_t0 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'year_of_t0'] time.df$sex <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'sex'] time.df$prior_mh_non_ade <- mh_nade[match(time.df$cfar_pid,mh_nade$cfar_pid,nomatch=NA),'prior_mh_non_ade'] time.df$prior_mh_non_ade_no_sa <- mh_nade[match(time.df$cfar_pid,mh_nade$cfar_pid,nomatch=NA),'prior_mh_non_ade_no_sa'] time.df$prior_mh_non_ade_only <- mh_nade[match(time.df$cfar_pid,mh_nade$cfar_pid,nomatch=NA),'prior_mh_non_ade_only'] time.df$prior_non_ade <- oth_nade[match(time.df$cfar_pid,oth_nade$cfar_pid,nomatch=NA),'prior_non_ade'] time.df$cause_of_death <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'cause_of_death'] time.df$route_of_infection <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'risk1'] time.df$route_of_infection <- ifelse(time.df$route_of_infection == '',NA,time.df$route_of_infection) # Collapsing 'Other' which has 8 into 'Unknown' time.df$route_of_infection <- ifelse(time.df$route_of_infection == 'Other','Unknown',time.df$route_of_infection) time.df$route_of_infection <- factor(time.df$route_of_infection) time.df$route_of_infection_combined <- ifelse(time.df$route_of_infection == 'IDU' & !is.na(time.df$route_of_infection),1, ifelse(time.df$route_of_infection %in% c('Hetero','Unknown'),0,NA)) time.df$route_of_infection_combined.factor <- factor(time.df$route_of_infection_combined, levels=c(0,1), labels=c('Non-IDU','IDU')) time.df$secondary_route_of_infection <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'risk2'] time.df$secondary_route_of_infection <- ifelse(time.df$secondary_route_of_infection == '',NA,time.df$secondary_route_of_infection) time.df$prior_ade <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'prior_ade'] time.df$prior_hcv_t0 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'hcv_prior_t0'] time.df$pid.month <- paste(time.df$cfar_pid, time.df$month, sep='.') # Adding in CD4 counts to monthly data params <- grep('cd4_count',names(data),value=TRUE) params <- params[-1] # removing baseline_cd4_count cd4_count <- reshape(data, params, v.names='cd4_count',timevar='lab_visit',idvar='cfar_pid',direction='long',sep=',', drop=setdiff(names(data),c('cfar_pid',params))) params <- grep('age_at_lab',names(data),value=TRUE) age_at_lab <- reshape(data, params, v.names='age_at_lab',timevar='lab_visit',idvar='cfar_pid',direction='long',sep=',', drop=setdiff(names(data), c('cfar_pid',params))) params <- grep('hiv1_rna',names(data),value=TRUE) hiv1_rna <- reshape(data, params, v.names='hiv1_rna',timevar='lab_visit',idvar='cfar_pid',direction='long',sep=',', drop=setdiff(names(data),c('cfar_pid',params))) labs <- merge(cd4_count,hiv1_rna,all=TRUE) labs <- merge(labs, age_at_lab, all=TRUE) labs$baseline_cd4_count <- data[match(labs$cfar_pid, data$cfar_pid, nomatch=NA),'baseline_cd4_count'] labs$baseline_vl <- data[match(labs$cfar_pid, data$cfar_pid, nomatch=NA),'baseline_vl'] labs <- labs[,-2] labs$age_at_t0 <- data[match(labs$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_t0'] labs$month <- with(labs, (age_at_lab - age_at_t0)*12+1) labs$month_rounded <- ceiling(labs$month) labs$pid.month <- paste(labs$cfar_pid, labs$month_rounded, sep='.') # To add CD4 count to monthly data frame: # For those PIDs who have duplicated records in a month, take the non-missing CD4 count or the first CD4 count in the month if # all are non-missing labs <- labs[order(labs$cfar_pid, labs$month_rounded, labs$cd4_count, na.last=TRUE),] which.rows <- which(duplicated(labs$pid.month) & is.na(labs$cd4_count)) sub_labs <- labs[-which.rows,] sub_labs <- sub_labs[order(sub_labs$cfar_pid, sub_labs$age_at_lab),] sub_labs$cd4_count <- ifelse(sub_labs$month_rounded == 1 & (sub_labs$cd4_count != sub_labs$baseline_cd4_count | (is.na(sub_labs$cd4_count) & !is.na(sub_labs$baseline_cd4_count))), sub_labs$baseline_cd4_count,sub_labs$cd4_count) time.df$cd4_count <- sub_labs[match(time.df$pid.month, sub_labs$pid.month,nomatch=NA),'cd4_count'] time.df$cd4_count <- ifelse(time.df$month == 1 & is.na(time.df$cd4_count) & !is.na(time.df$baseline_cd4_count), time.df$baseline_cd4_count,time.df$cd4_count) # Repeat for HIV1-RNA labs <- labs[order(labs$cfar_pid, labs$month_rounded, labs$hiv1_rna, na.last=TRUE),] which.rows <- which(duplicated(labs$pid.month) & is.na(labs$hiv1_rna)) sub_labs <- labs[-which.rows,] sub_labs <- sub_labs[order(sub_labs$cfar_pid, sub_labs$age_at_lab),] time.df$hiv1_rna <- sub_labs[match(time.df$pid.month, sub_labs$pid.month,nomatch=NA),'hiv1_rna'] sub_labs$hiv1_rna <- ifelse(sub_labs$month_rounded == 1 & (sub_labs$hiv1_rna != sub_labs$baseline_vl | (is.na(sub_labs$hiv1_rna) & !is.na(sub_labs$baseline_vl))), sub_labs$baseline_vl,sub_labs$hiv1_rna) # Lower limits of detection for viral loads seem to vary so will set all to 399 time.df$hiv1_rna <- sub_labs[match(time.df$pid.month, sub_labs$pid.month, nomatch=NA),'hiv1_rna'] time.df$hiv1_rna <- ifelse(time.df$hiv1_rna < 399 & !is.na(time.df$hiv1_rna),399,time.df$hiv1_rna) time.df$hiv1_rna <- ifelse(time.df$month == 1 & is.na(time.df$hiv1_rna) & !is.na(time.df$baseline_vl), time.df$baseline_vl,time.df$hiv1_rna) # Adding in regimen information # First need to define those that straddle age_at_t0 and modify start date for those; # Will also omit those whose start and end date occurred before age_at_t0 params <- grep('age_at_reg_start_',names(data),value=TRUE) reg_start <- na.omit(reshape(data, params, timevar='regimen',idvar='cfar_pid',direction='long',sep='', drop=setdiff(names(data),c('cfar_pid',params)))) reg_start$age <- reg_start$age_at_reg_start_date params <- grep('age_at_reg_end_',names(data),value=TRUE) reg_end <- na.omit(reshape(data, params, timevar='regimen',idvar='cfar_pid',direction='long',sep='', drop=setdiff(names(data),c('cfar_pid',params)))) reg_end$age <- reg_end$age_at_reg_end_date # reg_end$regimen <- reg_end$reg_end reg_total <- merge(subset(reg_start, select=c(cfar_pid,regimen,age_at_reg_start_date)), subset(reg_end, select=c(cfar_pid,regimen,age_at_reg_end_date)),by=c('cfar_pid','regimen'),all=TRUE) reg_total$age_at_t0 <- data[match(reg_total$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_t0'] reg_total$straddle <- ifelse(reg_total$age_at_reg_start_date < reg_total$age_at_t0 & reg_total$age_at_reg_end_date > reg_total$age_at_t0,1,0) reg_total$omit <- ifelse(reg_total$age_at_reg_start_date < reg_total$age_at_t0 & reg_total$age_at_reg_end_date < reg_total$age_at_t0,1,0) reg_total <- subset(reg_total, omit == 0) reg_total$age_at_reg_start_date_modified <- ifelse(reg_total$straddle == 1,reg_total$age_at_t0, reg_total$age_at_reg_start_date) reg_total$pid.reg <- paste(reg_total$cfar_pid,reg_total$regimen,sep='.') #--------------------------# new.dat <- reshape(subset(reg_total, select=c(cfar_pid, age_at_reg_start_date_modified,age_at_reg_end_date)), timevar='regimen',idvar='pid.reg', varying=list(c('age_at_reg_start_date_modified','age_at_reg_end_date')),v.names='age',direction='long') new.dat$count <- rep(c(1L,-1L),each=(nrow(new.dat)/2)) new.dat$reg_ind <- 0L times <- time.df[,c('cfar_pid','age')] vars <- merge(new.dat, times, by=c('cfar_pid','age'),all=TRUE, sort=TRUE) vars$count[is.na(vars$count)] <- 0L split(vars$reg_ind, vars$cfar_pid) <- lapply(split(vars$count, vars$cfar_pid), function(x) cumsum(x) + as.integer(x == -1L)) new.time.df <- merge(time.df, vars[,c('cfar_pid','age','reg_ind')],by=c('cfar_pid','age'),all.x=TRUE) time.df <- new.time.df rm(new.time.df) #--------------------------# # Also need reg_date_type # For those that straddle age_at_t0, need to get age_at_reg_start_date_modified for below reg_type <- merge(subset(reg_start, select=c(cfar_pid,regimen,age,age_at_reg_start_date)), subset(reg_end, select=c(cfar_pid, regimen,age,age_at_reg_end_date)),all=TRUE) reg_type$pid.reg <- paste(reg_type$cfar_pid, reg_type$regimen,sep='.') reg_type$age_at_reg_start_date_modified <- reg_total[match(reg_type$pid.reg, reg_total$pid.reg, nomatch=NA),'age_at_reg_start_date_modified'] reg_type$reg_date_type <- ifelse(!is.na(reg_type$age_at_reg_start_date_modified) & is.na(reg_type$age_at_reg_end_date),'START', ifelse(is.na(reg_type$age_at_reg_start_date) & !is.na(reg_type$age_at_reg_end_date),'END',NA)) reg_type$age_at_t0 <- data[match(reg_type$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_t0'] reg_type$age_after_year_gap <- ages[match(reg_type$cfar_pid,ages$cfar_pid,nomatch=NA),'age_after_year_gap'] reg_type <- subset(reg_type, age >= age_at_t0 & (is.na(age_after_year_gap) | age < age_after_year_gap)) reg_type <- reg_type[order(reg_type$cfar_pid, reg_type$age),] reg_type$month <- with(reg_type, (age - age_at_t0)*12+1) reg_type$month_rounded <- ceiling(reg_type$month) reg_type$pid.month <- paste(reg_type$cfar_pid,reg_type$month_rounded, sep='.') reg_type <- reg_type[order(reg_type$cfar_pid,reg_type$age),] # Calculating reg_date_type_modified_full junk <- lapply(split(subset(reg_type, select=c(reg_date_type,pid.month)),reg_type$pid.month), FUN=function(y){ if(all(is.na(y[,1]))){ x <- NA x.full <- NA } else { x <- paste(unique(y[!is.na(y[,1]),1]), collapse=',') x.full <- paste(y[!is.na(y[,1]),1], collapse=',') } if(is.na(x)){ j <- NA } else if(paste(unique(x,collapse=',')) == 'START,END'){ j <- 'BOTH' } else if(paste(unique(x,collapse=',')) == 'END,START'){ j <- 'END,START' } else { j <- unique(y[!is.na(y[,1]),1]) } df <- data.frame('pid.month'=y[1,2], 'reg_date_type_modified'=j, 'reg_date_type_modified_full'=x.full) return(df) }) junk.df <- do.call('rbind',junk) reg_type$reg_date_type_modified <- junk.df[match(reg_type$pid.month,junk.df$pid.month,nomatch=NA),'reg_date_type_modified'] reg_type$reg_date_type_modified_full <- junk.df[match(reg_type$pid.month,junk.df$pid.month,nomatch=NA),'reg_date_type_modified_full'] time.df$reg_date_type_modified <- reg_type[match(time.df$pid.month,reg_type$pid.month,nomatch=NA),'reg_date_type_modified'] time.df$reg_date_type_modified_full <- reg_type[match(time.df$pid.month,reg_type$pid.month,nomatch=NA),'reg_date_type_modified_full'] # Need to change those who have 0 at reg_date_type_modified = 'END' from 0 to 1 junk <- lapply(split(subset(time.df, select=c(cfar_pid,reg_date_type_modified,reg_ind)),time.df$cfar_pid),FUN=function(y){ which.end <- which(y[,'reg_date_type_modified'] == 'END' & !is.na(y[,'reg_date_type_modified'])) if(length(which.end) == 1 && which.end == 1){ y[1,'reg_ind'] <- 1 } else if(1 %in% which.end & length(which.end) > 1){ y[which.end,'reg_ind'] <- c(1,y[(which.end[-1]-1),'reg_ind']) } else if(length(which.end) > 0){ y[which.end,'reg_ind'] <- y[(which.end - 1),'reg_ind'] } df <- data.frame('cfar_pid'=y[,'cfar_pid'], 'reg_ind_modified'=y[,'reg_ind']) return(df)}) junk.df <- do.call('rbind',junk) time.df <- cbind(time.df,junk.df$reg_ind_modified) names(time.df)[dim(time.df)[2]] <- 'reg_count_modified' time.df$reg_indicator <- ifelse(time.df$reg_count_modified > 0,1,0) # A few with 'BOTH' or 'END,START' had 0 for reg_indicator and reg_count_modified so need to change that time.df$reg_count_modified <- ifelse(time.df$reg_date_type_modified %in% c('BOTH','END,START') & time.df$reg_count_modified == 0,1,time.df$reg_count_modified) time.df$reg_indicator <- ifelse(time.df$reg_date_type_modified %in% c('BOTH','END,START') & time.df$reg_indicator == 0,1,time.df$reg_indicator) straddle <- tapply(reg_total$straddle, INDEX=reg_total$cfar_pid, FUN=max) straddle.df <- data.frame('cfar_pid'=names(straddle), 'straddle'=straddle) time.df$straddle <- straddle.df[match(time.df$cfar_pid, straddle.df$cfar_pid, nomatch=NA),'straddle'] which.row <- which(!duplicated(time.df$cfar_pid) & time.df$straddle == 1) time.df$reg_date_type_modified[which.row] <- 'START' # #------------------------------# params <- grep('haart[0123456789]',names(data),value=TRUE) haart <- na.omit(reshape(data, params, v.names='haart_ind',timevar='reg',idvar='cfar_pid',direction='long',sep=',', drop=setdiff(names(data),c('cfar_pid',params)))) haart <- haart[order(haart$cfar_pid,haart$reg),] haart$pid.reg <- paste(haart$cfar_pid,haart$reg, sep='.') # reg_total is subsetted on those that happened after age_at_t0 and before age_after_year_gap reg_total$haart_indicator <- haart[match(reg_total$pid.reg, haart$pid.reg,nomatch=NA),'haart_ind'] sub_reg_total <- subset(reg_total, haart_indicator == 1) haart.new.dat <- reshape(subset(sub_reg_total, select=c(cfar_pid, age_at_reg_start_date_modified,age_at_reg_end_date)), timevar='regimen',idvar='pid.reg',varying=list(c('age_at_reg_start_date_modified','age_at_reg_end_date')), v.names='age',direction='long') haart.new.dat$count <- rep(c(1L,-1L),each=(nrow(haart.new.dat)/2)) haart.new.dat$haart_ind <- 0L times <- time.df[,c('cfar_pid','age')] haart.vars <- merge(haart.new.dat, times, by=c('cfar_pid','age'),all=TRUE, sort=TRUE) haart.vars$count[is.na(haart.vars$count)] <- 0L split(haart.vars$haart_ind, haart.vars$cfar_pid) <- lapply(split(haart.vars$count, haart.vars$cfar_pid), function(x) cumsum(x) + as.integer(x == -1L)) new.time.df <- merge(time.df, haart.vars[,c('cfar_pid','age','haart_ind')],by=c('cfar_pid','age'),all.x=TRUE) time.df <- new.time.df rm(new.time.df) # To correct where at 'END' or 'BOTH' haart_ind is 0 and not 1 time.df$haart_ind <- ifelse(time.df$reg_date_type_modified %in% c('END','BOTH') & time.df$reg_count_modified == 1 & time.df$haart_ind == 0,1, time.df$haart_ind) @ <>= ids.with.missing.baseline.cd4 <- time.df$cfar_pid[which(is.na(time.df$cd4_count) & !duplicated(time.df$cfar_pid))] ids.with.missing.baseline.vl <- time.df$cfar_pid[which(is.na(time.df$hiv1_rna) & !duplicated(time.df$cfar_pid))] # Need to hard code baseline CD4 for the 14 PIDs who have missing baseline CD4 counts # PIDs with missing baseline CD4 counts: # 993179A: CD4 count 4.4 months after baseline (was on c-ART) # 993201K: delete; closest CD4 is 63.3 months after baseline # 993344K: CD4 count 4.4 months before baseline (not on c-ART when measured) # 993634E: CD4 count 9.2 months after baseline (on c-ART when measured) # 993875G: delete; closest CD4 is 67.4 months after baseline # 993918D: CD4 count 4.6 months before baseline (not on c-ART when measured; only 1 record in data) # 994413A: delete # 994768B: delete; closest CD4 is 30.0 months after baseline # 994900I: delete; closest CD4 is 28.2 months after baseline # 994977K: CD4 count 4.7 months after baseline (on and off c-ART repeatedly) # 995212F: delete; CD4 count 20.4 months after baseline # 995832C: avg CD4 count 15.2 months before baseline and CD4 count 4.7 months after baseline (not on c-ART before; on after) # 996128E: avg CD4 count 5.6 months before baseline and CD4 count 7.1 months after baseline (on c-ART before and after baseline) # 996405H: delete ids.2.delete <- c('993201K','993875G','994413A','994768B','994900I','995212F','996405H') ids.2.alter <- c('993179A','993344K','993634E','993918D','994977K','995832C','996128E') time.df <- subset(time.df, cfar_pid %nin% ids.2.delete) time.df$cfar_pid <- factor(time.df$cfar_pid) time.df$baseline_cd4_count_b <- ifelse(time.df$cfar_pid == '993179A',280, ifelse(time.df$cfar_pid == '993344K',20, ifelse(time.df$cfar_pid == '993634E',182, ifelse(time.df$cfar_pid == '993918D',20, ifelse(time.df$cfar_pid == '994977K',351, ifelse(time.df$cfar_pid == '995832C',((78+148)/2), ifelse(time.df$cfar_pid == '996128E', ((73+50)/2),time.df$baseline_cd4_count))))))) junk <- data.frame('cfar_pid'=ids.2.alter, 'new_cd4'=c(280,20,182,20,351,(78+148)/2,(73+50)/2)) which.row <- which(time.df$cfar_pid %in% ids.2.alter & time.df$month == 1) time.df$cd4_count[which.row] <- junk$new_cd4 ids.2.alter.vl <- ids.with.missing.baseline.vl[which(ids.with.missing.baseline.vl %in% ids.2.alter)] time.df$baseline_vl_b <- ifelse(time.df$cfar_pid == '993344K',400000, ifelse(time.df$cfar_pid == '993918D',408582, ifelse(time.df$cfar_pid == '994977K',1160, ifelse(time.df$cfar_pid == '995832C',((100001+49)/2), time.df$baseline_vl)))) junk <- data.frame('cfar_pid'=ids.2.alter.vl, 'new_vl'=c(400000, 408582, 1160, (100001+399)/2)) which.row <- which(time.df$cfar_pid %in% ids.2.alter.vl & time.df$month == 1) time.df$hiv1_rna[which.row] <- junk$new_vl @ <>= # Dragging down CD4 counts and viral loads and age at which those measurements made time.df$cd4_count_b <- unsplit(lapply(split(time.df$cd4_count, time.df$cfar_pid), function(x){L <- !is.na(x) x[c(NA,which(L))[cumsum(L)+1]]}), time.df$cfar_pid) time.df$age_at_cd4_count <- unsplit(lapply(split(subset(time.df, select=c(cfar_pid,cd4_count,age)),time.df$cfar_pid), function(x){L <- !is.na(x[,'cd4_count']) x[c(NA,which(L))[cumsum(L)+1],'age']}), time.df$cfar_pid) time.df$hiv1_rna_b <- unsplit(lapply(split(time.df$hiv1_rna, time.df$cfar_pid), function(x){L <- !is.na(x) x[c(NA,which(L))[cumsum(L)+1]]}), time.df$cfar_pid) time.df$log10_hiv1_rna_b <- log10(time.df$hiv1_rna_b) time.df$undetectable_vl <- ifelse(time.df$hiv1_rna_b == 399 & !is.na(time.df$hiv1_rna_b),1, ifelse(!is.na(time.df$hiv1_rna_b),0,NA)) time.df$age_at_hiv1_rna <- unsplit(lapply(split(subset(time.df, select=c(cfar_pid,hiv1_rna,age)),time.df$cfar_pid), function(x){L <- !is.na(x[,'hiv1_rna']) x[c(NA,which(L))[cumsum(L)+1],'age']}), time.df$cfar_pid) #------------------------------------------# # Now need to add in hormonal contraceptive information params <- grep('age_at_hcp_start',names(data),value=TRUE) hcp_start <- na.omit(reshape(data, params, timevar='regimen',idvar='cfar_pid',direction='long',sep='', drop=setdiff(names(data),c('cfar_pid',params)))) hcp_start$age <- hcp_start$age_at_hcp_start_date params <- grep('hcp_title',names(data), value=TRUE) params <- params[-which(params == 'first_hcp_title')] data$hcp_title1 <- ifelse(data$hcp_title1 == '',NA,data$hcp_title1) data$hcp_title2 <- ifelse(data$hcp_title2 == '',NA,data$hcp_title2) data$hcp_title3 <- ifelse(data$hcp_title3 == '',NA,data$hcp_title3) data$hcp_title4 <- ifelse(data$hcp_title4 == '',NA,data$hcp_title4) data$hcp_title5 <- ifelse(data$hcp_title5 == '',NA,data$hcp_title5) hcp_type <- na.omit(reshape(data, params, timevar='regimen',idvar='cfar_pid',direction='long',sep='', drop=setdiff(names(data),c('cfar_pid',params)))) hcp_start <- merge(hcp_start, hcp_type, all=TRUE) params <- grep('age_at_hcp_end',names(data),value=TRUE) hcp_end <- na.omit(reshape(data, params, timevar='regimen',idvar='cfar_pid',direction='long',sep='', drop=setdiff(names(data),c('cfar_pid',params)))) hcp_end$age <- hcp_end$age_at_hcp_end_date hcp_total <- merge(subset(hcp_start, select=c(cfar_pid,regimen,age_at_hcp_start_date,hcp_title)), subset(hcp_end, select=c(cfar_pid,regimen,age_at_hcp_end_date)), by=c('cfar_pid','regimen'),all=TRUE) hcp_total$age_at_t0 <- data[match(hcp_total$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_t0'] hcp_total$age_after_year_gap <- ages[match(hcp_total$cfar_pid,ages$cfar_pid,nomatch=NA),'age_after_year_gap'] hcp_total$straddle <- ifelse(hcp_total$age_at_hcp_start_date < hcp_total$age_at_t0 & hcp_total$age_at_hcp_end_date > hcp_total$age_at_t0,1,0) hcp_total$end_straddle <- ifelse(hcp_total$age_at_hcp_start_date < hcp_total$age_after_year_gap & hcp_total$age_at_hcp_end_date >= hcp_total$age_after_year_gap,1,0) hcp_total$omit <- ifelse(hcp_total$age_at_hcp_start_date < hcp_total$age_at_t0 & hcp_total$age_at_hcp_end_date <= hcp_total$age_at_t0,1,0) hcp_total <- subset(hcp_total, omit == 0) hcp_total$age_at_hcp_start_date_modified <- ifelse(hcp_total$straddle == 1, hcp_total$age_at_t0, hcp_total$age_at_hcp_start_date) # Need to reorder regimens in hcp_total so that all start with 1 -- some don't because they had regimens # prior to baseline hcp_total$regimen <- NA split(hcp_total$regimen, hcp_total$cfar_pid) <- lapply(split(subset(hcp_total, select=c(regimen,cfar_pid)),hcp_total$cfar_pid), FUN=function(y){ 1:nrow(y)}) hcp_total$pid.hcp <- paste(hcp_total$cfar_pid, hcp_total$regimen,sep='.') hcp_total$pid.age <- paste(hcp_total$cfar_pid, hcp_total$age_at_hcp_start_date_modified,sep='.') hcp_total$hcp_title_category <- ifelse(hcp_total$hcp_title %in% c('Depo-Provera','Provera','Medroxyprogest Ace (Contracep)'), 'Depo-Provera','Other') # Adding in depo hcp_total_depo <- subset(hcp_total, hcp_title_category == 'Depo-Provera') new.dat.depo <- na.omit(reshape(subset(hcp_total_depo, select=c(cfar_pid,age_at_hcp_start_date_modified,age_at_hcp_end_date, hcp_title,hcp_title_category)),v.names='age', timevar='regimen',idvar='pid.hcp',direction='long',sep=',',varying=list(c('age_at_hcp_start_date_modified', 'age_at_hcp_end_date')))) new.dat.depo$pid.age <- paste(new.dat.depo$cfar_pid,new.dat.depo$age,sep='.') new.dat.depo$pid.regimen <- paste(new.dat.depo$cfar_pid,new.dat.depo$regimen,sep='.') hcp_total_depo$pid.regimen <- paste(hcp_total_depo$cfar_pid,hcp_total_depo$regimen,sep='.') new.dat.depo <- upData(new.dat.depo, rename=c('hcp_title'='hcp_title_depo', 'hcp_title_category'='hcp_depo_category')) new.dat.depo$count <- rep(c(1L,-1L),each=(nrow(new.dat.depo)/2)) new.dat.depo$hcp_ind <- 0L times <- time.df[,c('cfar_pid','age','month')] vars <- merge(new.dat.depo, times,by=c('cfar_pid','age'),all=TRUE,sort=TRUE) vars$count[is.na(vars$count)] <- 0L split(vars$hcp_ind, vars$cfar_pid) <- lapply(split(vars$count, vars$cfar_pid), function(x) cumsum(x) + as.integer(x == -1L)) vars$pid.hcp2 <- unsplit(lapply(split(vars$pid.hcp, vars$cfar_pid), function(x){L <- !is.na(x) x[c(NA,which(L))[cumsum(L)+1]]}), vars$cfar_pid) vars$pid.hcp2 <- ifelse(vars$hcp_ind == 0, NA,vars$pid.hcp2) vars$hcp_title_depo <- ifelse(vars$hcp_ind %in% c(1,2),'Depo-Provera',NA) vars$hcp_depo_category <- ifelse(vars$hcp_ind %in% c(1,2),'Depo-Provera',NA) vars$age_at_t0 <- time.df[match(vars$cfar_pid,time.df$cfar_pid,nomatch=NA),'age_at_t0'] vars$month2 <- round(12*(vars$age - vars$age_at_t0) + 1) vars$pid.month <- paste(vars$cfar_pid, vars$month2,sep='.') split(vars$hcp_ind, vars$pid.month) <- lapply(split(subset(vars,select=c(month2,hcp_ind)),vars$pid.month),FUN=function(y){ max(y[,'hcp_ind'],na.rm=TRUE)}) vars$hcp_title_depo <- ifelse(vars$hcp_ind %in% c(1,2), 'Depo-Provera',NA) vars$hcp_depo_category <- ifelse(vars$hcp_ind %in% c(1,2), 'Depo-Provera',NA) new.time.df <- merge(time.df, vars[,c('cfar_pid','age','hcp_ind','pid.hcp2','hcp_title_depo','hcp_depo_category')], by=c('cfar_pid','age'),all.x=TRUE) new.time.df <- upData(new.time.df, rename=c('hcp_ind'='hcp_depo_ind')) time.df <- new.time.df rm(new.time.df) # Oral hcp_total_other <- subset(hcp_total, hcp_title_category == 'Other') new.dat.other <- na.omit(reshape(subset(hcp_total_other, select=c(cfar_pid,age_at_hcp_start_date_modified,age_at_hcp_end_date, hcp_title,hcp_title_category)),v.names='age', timevar='regimen',idvar='pid.hcp',direction='long',sep=',',varying=list(c('age_at_hcp_start_date_modified', 'age_at_hcp_end_date')))) new.dat.other$pid.age <- paste(new.dat.other$cfar_pid,new.dat.other$age,sep='.') new.dat.other$pid.regimen <- paste(new.dat.other$cfar_pid,new.dat.other$regimen,sep='.') hcp_total_other$pid.regimen <- paste(hcp_total_other$cfar_pid,hcp_total_other$regimen,sep='.') new.dat.other <- upData(new.dat.other, rename=c('hcp_title'='hcp_title_other', 'hcp_title_category'='hcp_other_category')) new.dat.other$count <- rep(c(1L,-1L),each=(nrow(new.dat.other)/2)) new.dat.other$hcp_ind <- 0L times <- time.df[,c('cfar_pid','age','month')] vars <- merge(new.dat.other, times,by=c('cfar_pid','age'),all=TRUE,sort=TRUE) vars$count[is.na(vars$count)] <- 0L split(vars$hcp_ind, vars$cfar_pid) <- lapply(split(vars$count, vars$cfar_pid), function(x) cumsum(x) + as.integer(x == -1L)) vars$pid.hcp2 <- unsplit(lapply(split(vars$pid.hcp, vars$cfar_pid), function(x){L <- !is.na(x) x[c(NA,which(L))[cumsum(L)+1]]}), vars$cfar_pid) vars$pid.hcp2 <- ifelse(vars$hcp_ind == 0, NA,vars$pid.hcp2) vars$hcp_other_category <- ifelse(vars$hcp_ind %in% c(1,2),'Other',NA) vars$age_at_t0 <- time.df[match(vars$cfar_pid,time.df$cfar_pid,nomatch=NA),'age_at_t0'] vars$month2 <- round(12*(vars$age - vars$age_at_t0) + 1) vars$pid.month <- paste(vars$cfar_pid, vars$month2,sep='.') split(vars$hcp_ind, vars$pid.month) <- lapply(split(subset(vars,select=c(month2,hcp_ind)),vars$pid.month),FUN=function(y){ max(y[,'hcp_ind'],na.rm=TRUE)}) vars$hcp_other_category <- ifelse(vars$hcp_ind %in% c(1,2), 'Other',NA) vars$hcp_title_other <- unsplit(lapply(split(subset(vars,select=c(hcp_ind,hcp_title_other)), vars$cfar_pid), function(x){L <- !is.na(x[,'hcp_title_other']) & x[,'hcp_ind'] %in% c(1,2) x[c(NA,which(L))[cumsum(L)+1],'hcp_title_other']}), vars$cfar_pid) # The above code drags it down to all remaining records. Don't want it on records with hcp_ind == 0 so am fixing it in # next line of code vars$hcp_title_other <- ifelse(vars$hcp_ind == 0,NA,vars$hcp_title_other) vars$hcp_ind <- ifelse(vars$hcp_ind %in% c(1,2),1,0) vars$hcp_title_other_test <- NA split(vars$hcp_title_other_test, vars$pid.month) <- lapply(split(subset(vars,select=c(pid.month,hcp_title_other)), vars$pid.month),FUN=function(y){ if(all(is.na(y[,'hcp_title_other']))){ rep(NA,times=length(y[,'pid.month'])) } else if(length(y[which(!is.na(y[,'hcp_title_other'])),'hcp_title_other']) > 1){ # taking second unique if more than one in a month since that is # new regimen started, not stopped tmp <- unique(y[which(!is.na(y[,'hcp_title_other'])),'hcp_title_other']) if(length(tmp) > 1){ rep(unique(y[which(!is.na(y[,'hcp_title_other'])),'hcp_title_other'])[length(tmp)], times=length(y[,'pid.month'])) } else { rep(unique(y[which(!is.na(y[,'hcp_title_other'])),'hcp_title_other']), times=length(y[,'pid.month'])) } } else { rep(unique(y[which(!is.na(y[,'hcp_title_other'])),'hcp_title_other']), times=length(y[,'pid.month'])) } }) vars$hcp_title_other <- vars$hcp_title_other_test vars$hcp_other_category <- ifelse(vars$hcp_ind %in% c(1,2),'Other',NA) new.time.df <- merge(time.df, vars[,c('cfar_pid','age','hcp_ind','pid.hcp2','hcp_title_other','hcp_other_category')], by=c('cfar_pid','age'),all.x=TRUE) new.time.df <- upData(new.time.df, rename=c('hcp_ind'='hcp_other_ind')) time.df <- new.time.df rm(new.time.df) # Need to keep this to have a general hcp_ind for overall cumulative hcp use new.dat <- reshape(subset(hcp_total, select=c(cfar_pid,age_at_hcp_start_date_modified,age_at_hcp_end_date)), timevar='regimen',idvar='pid.hcp', varying=list(c('age_at_hcp_start_date_modified','age_at_hcp_end_date')), v.names='age',direction='long') new.dat$pid.age <- paste(new.dat$cfar_pid,new.dat$age,sep='.') new.dat$pid.regimen <- paste(new.dat$cfar_pid,new.dat$regimen,sep='.') hcp_total$pid.regimen <- paste(hcp_total$cfar_pid,hcp_total$regimen,sep='.') new.dat$count <- rep(c(1L,-1L),each=(nrow(new.dat)/2)) new.dat$hcp_ind <- 0L times <- time.df[,c('cfar_pid','age')] vars <- merge(new.dat, times,by=c('cfar_pid','age'),all=TRUE,sort=TRUE) vars$count[is.na(vars$count)] <- 0L vars$age_at_t0 <- time.df[match(vars$cfar_pid,time.df$cfar_pid,nomatch=NA),'age_at_t0'] vars$month2 <- round(12*(vars$age - vars$age_at_t0) + 1) vars$pid.month <- paste(vars$cfar_pid, vars$month2,sep='.') split(vars$hcp_ind, vars$cfar_pid) <- lapply(split(vars$count, vars$cfar_pid), function(x) cumsum(x) + as.integer(x == -1L)) split(vars$hcp_ind, vars$pid.month) <- lapply(split(subset(vars,select=c(month2,hcp_ind)),vars$pid.month),FUN=function(y){ max(y[,'hcp_ind'],na.rm=TRUE) }) vars$hcp_ind <- ifelse(vars$hcp_ind %in% c(1,2),1,0) vars$pid.hcp2 <- unsplit(lapply(split(vars$pid.hcp, vars$cfar_pid), function(x){L <- !is.na(x) x[c(NA,which(L))[cumsum(L)+1]]}), vars$cfar_pid) vars$pid.hcp2 <- ifelse(vars$hcp_ind == 0, NA,vars$pid.hcp2) new.time.df <- merge(time.df, vars[,c('cfar_pid','age','hcp_ind','pid.hcp2')], by=c('cfar_pid','age'),all.x=TRUE) time.df <- new.time.df rm(new.time.df) # Also need hcp_date_type hcp_type <- merge(subset(hcp_start, select=c(cfar_pid, regimen, age, age_at_hcp_start_date)), subset(hcp_end, select=c(cfar_pid, regimen, age, age_at_hcp_end_date)),all=TRUE) hcp_type$pid.hcp <- paste(hcp_type$cfar_pid,hcp_type$regimen,sep='.') hcp_type$age_at_hcp_start_date_modified <- hcp_total[match(hcp_type$pid.hcp, hcp_total$pid.hcp,nomatch=NA),'age_at_hcp_start_date_modified'] hcp_type$age <- ifelse(!is.na(hcp_type$age_at_hcp_start_date) & hcp_type$age_at_hcp_start_date_modified != hcp_type$age_at_hcp_start_date,hcp_type$age_at_hcp_start_date_modified, hcp_type$age) hcp_type$hcp_date_type <- ifelse(!is.na(hcp_type$age_at_hcp_start_date_modified) & is.na(hcp_type$age_at_hcp_end_date),'START', ifelse(is.na(hcp_type$age_at_hcp_start_date) & !is.na(hcp_type$age_at_hcp_end_date),'END',NA)) hcp_type$age_at_t0 <- data[match(hcp_type$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_t0'] hcp_type$age_after_year_gap <- ages[match(hcp_type$cfar_pid,ages$cfar_pid,nomatch=NA),'age_after_year_gap'] hcp_type <- subset(hcp_type, age_at_hcp_start_date_modified >= age_at_t0 & (is.na(age_after_year_gap) | age_at_hcp_start_date_modified < age_after_year_gap)) hcp_type <- hcp_type[order(hcp_type$cfar_pid, hcp_type$age),] hcp_type$month <- with(hcp_type, (age - age_at_t0)*12+1) hcp_type$month_rounded <- round(hcp_type$month) hcp_type$pid.month <- paste(hcp_type$cfar_pid, hcp_type$month_rounded, sep='.') hcp_type <- hcp_type[order(hcp_type$cfar_pid, hcp_type$age),] junk <- tapply(hcp_type$hcp_date_type, INDEX=hcp_type$pid.month, FUN=function(y){ if(all(is.na(y))){ x <- NA } else { x <- paste(unique(y[!is.na(y)]),collapse=',') } if(is.na(x)){ j <- NA } else if(paste(unique(x,collapse=',')) == 'START,END'){ j <- 'BOTH' } else if(paste(unique(x,collapse=',')) == 'END,START'){ j <- 'END,START' } else { j <- unique(y[!is.na(y)]) }}) junk.df <- data.frame('pid.month'=names(junk), 'hcp_date_type'=junk) hcp_type$hcp_date_type_modified <- as.vector(junk.df[match(hcp_type$pid.month,junk.df$pid.month,nomatch=NA),'hcp_date_type']) time.df$hcp_date_type_modified <- hcp_type[match(time.df$pid.month,hcp_type$pid.month,nomatch=NA),'hcp_date_type_modified'] #--------------------------------# junk <- lapply(split(subset(time.df, select=c(cfar_pid,hcp_date_type_modified,hcp_ind)),time.df$cfar_pid),FUN=function(y){ which.end <- which(y[,'hcp_date_type_modified'] == 'END') if(length(which.end) > 0){ y[which.end,'hcp_ind'] <- y[(which.end - 1),'hcp_ind'] } df <- data.frame('cfar_pid'=y[,'cfar_pid'], 'hcp_ind_modified'=y[,'hcp_ind']) return(df)}) junk.df <- do.call('rbind',junk) time.df <- cbind(time.df, junk.df$hcp_ind_modified) names(time.df)[dim(time.df)[2]] <- 'hcp_count_modified' #---------------# oth_nade$age_at_t0 <- data[match(oth_nade$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_t0'] oth_nade$age_after_year_gap <- ages[match(oth_nade$cfar_pid, ages$cfar_pid, nomatch=NA),'age_after_year_gap'] mh_nade$age_at_t0 <- data[match(mh_nade$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_t0'] mh_nade$age_after_year_gap <- ages[match(mh_nade$cfar_pid, ages$cfar_pid, nomatch=NA),'age_after_year_gap'] mh_nade_only$age_at_t0 <- data[match(mh_nade_only$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_t0'] mh_nade_only$age_after_year_gap <- data[match(mh_nade_only$cfar_pid, data$cfar_pid, nomatch=NA),'age_after_year_gap'] mh_nade_no_sa$age_at_t0 <- data[match(mh_nade_no_sa$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_t0'] mh_nade_no_sa$age_after_year_gap <- data[match(mh_nade_no_sa$cfar_pid, data$cfar_pid, nomatch=NA),'age_after_year_gap'] # Finding first NADE that is after age_at_t0 and before age_after_year_gap params <- grep('age_at_nade_after_t0_date',names(oth_nade),value=TRUE) sub_oth_nade <- subset(oth_nade, !is.na(age_at_nade_after_t0_date1)) first_nade <- apply(subset(sub_oth_nade, select=c(params, 'age_at_t0','age_after_year_gap','cfar_pid')), 1, FUN=function(y){ age_at_t0 <- as.numeric(y['age_at_t0']) age_after_year_gap <- as.numeric(y['age_after_year_gap']) which.age <- min(which(y[params] >= age_at_t0 & (y[params] < age_after_year_gap | is.na(age_after_year_gap)))) if(which.age == Inf){ age_at_first_nade <- NA } else { age_at_first_nade <- as.numeric(y[which.age]) } if(!is.na(age_at_first_nade)){ nade <- 1 } else { nade <- 0 } df <- data.frame('cfar_pid'=y['cfar_pid'], 'age_of_first_nade'=age_at_first_nade, 'nade'=nade) return(df)}) first_nade.df <- do.call('rbind',first_nade) first_nade.df <- subset(first_nade.df, !is.na(age_of_first_nade)) any_nade <- apply(subset(sub_oth_nade, select=c(params, 'age_at_t0','age_after_year_gap','cfar_pid')),1,FUN=function(y){ age_at_t0 <- as.numeric(y['age_at_t0']) age_after_year_gap <- as.numeric(y['age_after_year_gap']) which.age <- which(y[params] >= age_at_t0 & (y[params] < age_after_year_gap | is.na(age_after_year_gap))) if(length(which.age) == 0){ any_nade.df <- data.frame('cfar_pid'=y['cfar_pid'], 'age_of_nade'=NA, 'month_of_nade'=NA) } else { month.age <- as.numeric(ceiling(as.numeric(y[which.age]) - age_at_t0)*12+1) any_nade.df <- data.frame('cfar_pid'=rep(y['cfar_pid'],length(which.age)), 'age_of_nade'=as.numeric(y[which.age]), 'month_of_nade'=month.age) } return(any_nade.df) }) any_nade.df <- do.call('rbind',any_nade) any_nade.df <- subset(any_nade.df, !is.na(age_of_nade)) any_nade.df$pid.month <- paste(any_nade.df$cfar_pid, any_nade.df$month_of_nade, sep='.') time.df$month_of_nade <- any_nade.df[match(time.df$pid.month, any_nade.df$pid.month, nomatch=NA),'month_of_nade'] params <- grep('age_at_mh_nade_after_t0_date',names(mh_nade),value=TRUE) sub_mh_nade <- subset(mh_nade, !is.na(age_at_mh_nade_after_t0_date1)) first_mh_nade <- apply(subset(sub_mh_nade, select=c(params, 'age_at_t0','age_after_year_gap','cfar_pid')), 1, FUN=function(y){ age_at_t0 <- as.numeric(y['age_at_t0']) age_after_year_gap <- as.numeric(y['age_after_year_gap']) which.age <- min(which(y[params] >= age_at_t0 & (y[params] < age_after_year_gap | is.na(age_after_year_gap)))) if(which.age == Inf){ age_at_first_mh_nade <- NA } else { age_at_first_mh_nade <- as.numeric(y[which.age]) } if(!is.na(age_at_first_mh_nade)){ mh_nade <- 1 } else { mh_nade <- 0 } df <- data.frame('cfar_pid'=y['cfar_pid'], 'age_of_first_mh_nade'=age_at_first_mh_nade, 'mh_nade'=mh_nade) return(df)}) first_mh_nade.df <- do.call('rbind',first_mh_nade) first_mh_nade.df <- subset(first_mh_nade.df, !is.na(age_of_first_mh_nade)) # mental health only NADEs params <- grep('age_mh_nade',names(mh_nade_only),value=TRUE) mh_nade_only$age_after_year_gap <- mh_nade[match(mh_nade_only$cfar_pid,mh_nade$cfar_pid,nomatch=NA),'age_after_year_gap'] mh_nade_only$age_at_t0 <- mh_nade[match(mh_nade_only$cfar_pid,mh_nade$cfar_pid,nomatch=NA),'age_at_t0'] mh_nade_only <- mh_nade_only[order(mh_nade_only$cfar_pid,mh_nade_only$age_mh_nade),] first_mh_nade_only.df <- mh_nade_only[!duplicated(mh_nade_only$cfar_pid),c('cfar_pid','age_mh_nade','mh_nade','age_at_t0','age_after_year_gap')] first_mh_nade_only.df <- subset(first_mh_nade_only.df, age_mh_nade >= age_at_t0 & (is.na(age_after_year_gap) | age_mh_nade < age_after_year_gap),select=c('cfar_pid','age_mh_nade','mh_nade')) names(first_mh_nade_only.df) <- c('cfar_pid','age_of_first_mh_nade_only','mh_nade_only') # mental health NADEs excluding SUBSTANCE ABUSE params <- grep('age_mh_nade',names(mh_nade_no_sa),value=TRUE) mh_nade_no_sa$age_after_year_gap <- mh_nade[match(mh_nade_no_sa$cfar_pid,mh_nade$cfar_pid,nomatch=NA),'age_after_year_gap'] mh_nade_no_sa$age_at_t0 <- mh_nade[match(mh_nade_no_sa$cfar_pid,mh_nade$cfar_pid,nomatch=NA),'age_at_t0'] mh_nade_no_sa <- mh_nade_no_sa[order(mh_nade_no_sa$cfar_pid,mh_nade_no_sa$age_mh_nade),] first_mh_nade_no_sa.df <- mh_nade_no_sa[!duplicated(mh_nade_no_sa$cfar_pid),c('cfar_pid','age_mh_nade','mh_nade','age_at_t0','age_after_year_gap')] first_mh_nade_no_sa.df <- subset(first_mh_nade_no_sa.df, age_mh_nade >= age_at_t0 & (is.na(age_after_year_gap) | age_mh_nade < age_after_year_gap),select=c('cfar_pid','age_mh_nade','mh_nade')) names(first_mh_nade_no_sa.df) <- c('cfar_pid','age_of_first_mh_nade_no_sa','mh_nade_no_sa') # Also need to get all mental health NADEs to create indicator if they have a mental health NADE prior to having another type of # NADE params <- grep('age_at_mh_nade_after_t0_date',names(mh_nade),value=TRUE) sub_mh_nade <- subset(mh_nade, !is.na(age_at_mh_nade_after_t0_date1)) any_mh_nade <- apply(subset(sub_mh_nade, select=c(params, 'age_at_t0','age_after_year_gap','cfar_pid')),1,FUN=function(y){ age_at_t0 <- as.numeric(y[7]) age_after_year_gap <- as.numeric(y[8]) which.age <- which(y[1:6] >= age_at_t0 & (y[1:6] < age_after_year_gap | is.na(age_after_year_gap))) if(length(which.age) == 0){ any_mh_nade.df <- data.frame('cfar_pid'=y[9], 'age_of_mh_nade'=NA, 'month_of_mh_nade'=NA) } else { month.age <- as.numeric(ceiling(as.numeric(y[which.age]) - age_at_t0)*12+1) any_mh_nade.df <- data.frame('cfar_pid'=rep(y[9],length(which.age)), 'age_of_mh_nade'=as.numeric(y[which.age]), 'month_of_mh_nade'=month.age) } return(any_mh_nade.df) }) any_mh_nade.df <- do.call('rbind',any_mh_nade) any_mh_nade.df <- subset(any_mh_nade.df, !is.na(age_of_mh_nade)) any_mh_nade.df$pid.month <- paste(any_mh_nade.df$cfar_pid, any_mh_nade.df$month_of_mh_nade, sep='.') time.df$month_of_mh_nade <- any_mh_nade.df[match(time.df$pid.month, any_mh_nade.df$pid.month, nomatch=NA),'month_of_mh_nade'] time.df$mh_nade_before_indicator <- ifelse(!is.na(time.df$month_of_mh_nade) & is.na(time.df$month_of_nade),1,0) time.df$age_of_first_nade <- first_nade.df[match(time.df$cfar_pid, first_nade.df$cfar_pid, nomatch=NA),'age_of_first_nade'] time.df$nade <- first_nade.df[match(time.df$cfar_pid, first_nade.df$cfar_pid,nomatch=NA),'nade'] time.df$nade[is.na(time.df$nade)] <- 0 time.df$age_of_first_mh_nade <- first_mh_nade.df[match(time.df$cfar_pid, first_mh_nade.df$cfar_pid, nomatch=NA),'age_of_first_mh_nade'] time.df$mh_nade <- first_mh_nade.df[match(time.df$cfar_pid, first_mh_nade.df$cfar_pid,nomatch=NA),'mh_nade'] time.df$mh_nade[is.na(time.df$mh_nade)] <- 0 time.df$age_of_first_mh_nade_only <- first_mh_nade_only.df[match(time.df$cfar_pid, first_mh_nade_only.df$cfar_pid, nomatch=NA),'age_of_first_mh_nade_only'] time.df$mh_nade_only <- first_mh_nade_only.df[match(time.df$cfar_pid, first_mh_nade_only.df$cfar_pid,nomatch=NA),'mh_nade_only'] time.df$mh_nade_only[is.na(time.df$mh_nade_only)] <- 0 time.df$age_of_first_mh_nade_no_sa <- first_mh_nade_no_sa.df[match(time.df$cfar_pid, first_mh_nade_no_sa.df$cfar_pid, nomatch=NA),'age_of_first_mh_nade_no_sa'] time.df$mh_nade_no_sa <- first_mh_nade_no_sa.df[match(time.df$cfar_pid, first_mh_nade_no_sa.df$cfar_pid,nomatch=NA),'mh_nade_no_sa'] time.df$mh_nade_no_sa[is.na(time.df$mh_nade_no_sa)] <- 0 # Need to add in indicator for pregnancy. Only have age of delivery so will have to go back 9 months to get age at pregnancy start time.df$age_at_subsequent_deliv_1 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_subsequent_deliv_1'] time.df$month_at_subsequent_deliv_1 <- ceiling((time.df$age_at_subsequent_deliv_1 - time.df$age_at_t0)*12+1) time.df$age_at_subsequent_deliv_2 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_subsequent_deliv_2'] time.df$month_at_subsequent_deliv_2 <- ceiling((time.df$age_at_subsequent_deliv_2 - time.df$age_at_t0)*12+1) time.df$age_at_subsequent_deliv_3 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_subsequent_deliv_3'] time.df$month_at_subsequent_deliv_3 <- ceiling((time.df$age_at_subsequent_deliv_3 - time.df$age_at_t0)*12+1) # Estimated date of conception (edc) time.df$age_at_subsequent_edc_1 <- data[match(time.df$cfar_pid, data$cfar_pid,nomatch=NA),'age_at_subsequent_edc_1'] time.df$month_at_subsequent_edc_1 <- ceiling((time.df$age_at_subsequent_edc_1 - time.df$age_at_t0)*12+1) time.df$age_at_subsequent_edc_2 <- data[match(time.df$cfar_pid, data$cfar_pid,nomatch=NA),'age_at_subsequent_edc_2'] time.df$month_at_subsequent_edc_2 <- ceiling((time.df$age_at_subsequent_edc_2 - time.df$age_at_t0)*12+1) time.df$age_at_subsequent_edc_3 <- data[match(time.df$cfar_pid, data$cfar_pid,nomatch=NA),'age_at_subsequent_edc_3'] time.df$month_at_subsequent_edc_3 <- ceiling((time.df$age_at_subsequent_edc_3 - time.df$age_at_t0)*12+1) # Pregnancy indicator time.df$pregnancy_indicator <- ifelse(!is.na(time.df$month_at_subsequent_deliv_1) & time.df$month >= time.df$month_at_subsequent_edc_1 & time.df$month < time.df$month_at_subsequent_deliv_1,1, ifelse(!is.na(time.df$month_at_subsequent_deliv_2) & time.df$month >= time.df$month_at_subsequent_edc_2 & time.df$month < time.df$month_at_subsequent_deliv_2,1, ifelse(!is.na(time.df$month_at_subsequent_deliv_3) & time.df$month >= time.df$month_at_subsequent_edc_3 & time.df$month < time.df$month_at_subsequent_deliv_3,1,0))) time.df <- time.df[order(time.df$cfar_pid, time.df$age),] # Now need to come up with end of follow-up for each of the different outcomes # NADE (excluding mental health ones) # Need to add in what the last age is for every person and then if their age_at_death is greater than that, make it missing time.df$last_age <- NA split(time.df$last_age, time.df$cfar_pid) <- lapply(split(time.df$age, time.df$cfar_pid),FUN=function(y){ max(y)}) time.df$age_at_death <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_death'] time.df$age_at_death <- ifelse(!is.na(time.df$age_at_death) & time.df$age_at_death > time.df$last_age,NA, time.df$age_at_death) time.df$month_at_death <- ceiling((time.df$age_at_death - time.df$age_at_t0)*12) time.df <- subset(time.df, is.na(age_at_death) | age <= age_at_death) time.df$last_age <- NA split(time.df$last_age, time.df$cfar_pid) <- lapply(split(time.df$age, time.df$cfar_pid),FUN=function(y){ max(y)}) # For NADE outcome (excluding mental health) time.df$max_age_for_nade_outcome <- NA split(time.df$max_age_for_nade_outcome, time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(last_age,age_of_first_nade)), time.df$cfar_pid),FUN=min, na.rm=TRUE) time.df$max_month_for_nade_outcome <- ceiling((time.df$max_age_for_nade_outcome - time.df$age_at_t0)*12) time.df$nade_outcome_indicator <- ifelse((time.df$month == time.df$max_month_for_nade_outcome | time.df$age == time.df$max_age_for_nade_outcome) & (!is.na(time.df$age_of_first_nade) & round(time.df$age_of_first_nade,5) <= round(time.df$last_age,5)),1,0) # For NADE outcome (including mental health) time.df$max_age_for_mh_nade_outcome <- NA split(time.df$max_age_for_mh_nade_outcome, time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(last_age,age_of_first_mh_nade)), time.df$cfar_pid),FUN=min, na.rm=TRUE) time.df$max_month_for_mh_nade_outcome <- ceiling((time.df$max_age_for_mh_nade_outcome - time.df$age_at_t0)*12) time.df$mh_nade_outcome_indicator <- ifelse((time.df$month == time.df$max_month_for_mh_nade_outcome | time.df$age == time.df$max_age_for_mh_nade_outcome) & (!is.na(time.df$age_of_first_mh_nade) & time.df$age_of_first_mh_nade <= time.df$last_age),1,0) # For NADE outcome (ONLY mental health) time.df$max_age_for_mh_nade_only_outcome <- NA split(time.df$max_age_for_mh_nade_only_outcome, time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(last_age,age_of_first_mh_nade_only)), time.df$cfar_pid),FUN=min, na.rm=TRUE) time.df$max_month_for_mh_nade_only_outcome <- ceiling((time.df$max_age_for_mh_nade_only_outcome - time.df$age_at_t0)*12) time.df$mh_nade_only_outcome_indicator <- ifelse((time.df$month == time.df$max_month_for_mh_nade_only_outcome | time.df$age == time.df$max_age_for_mh_nade_only_outcome) & (!is.na(time.df$age_of_first_mh_nade_only) & time.df$age_of_first_mh_nade_only <= time.df$last_age),1,0) # For NADE outcome (excluding SUBSTANCE ABUSE) time.df$max_age_for_mh_nade_no_sa_outcome <- NA split(time.df$max_age_for_mh_nade_no_sa_outcome, time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(last_age,age_of_first_mh_nade_no_sa)), time.df$cfar_pid),FUN=min, na.rm=TRUE) time.df$max_month_for_mh_nade_no_sa_outcome <- ceiling((time.df$max_age_for_mh_nade_no_sa_outcome - time.df$age_at_t0)*12) time.df$mh_nade_no_sa_outcome_indicator <- ifelse((time.df$month == time.df$max_month_for_mh_nade_no_sa_outcome | time.df$age == time.df$max_age_for_mh_nade_no_sa_outcome) & (!is.na(time.df$age_of_first_mh_nade_no_sa) & time.df$age_of_first_mh_nade_no_sa <= time.df$last_age),1,0) # For death # For NADE outcome (excluding mental health) time.df$max_age_for_death_outcome <- NA split(time.df$max_age_for_death_outcome, time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(last_age,age_at_death)), time.df$cfar_pid),FUN=min, na.rm=TRUE) time.df$max_month_for_death_outcome <- ceiling((time.df$max_age_for_death_outcome - time.df$age_at_t0)*12) time.df$death_outcome_indicator <- ifelse((time.df$month == time.df$max_month_for_death_outcome | time.df$age == time.df$max_age_for_death) & !is.na(time.df$age_at_death),1,0) # Combined NADE/Death outcome time.df$max_age_for_nade_dth_outcome <- NA split(time.df$max_age_for_nade_dth_outcome, time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(last_age, age_of_first_nade,age_at_death)), time.df$cfar_pid),FUN=min, na.rm=TRUE) time.df$max_month_for_nade_dth_outcome <- ceiling((time.df$max_age_for_nade_dth_outcome - time.df$age_at_t0)*12) time.df$nade_dth_outcome_indicator <- ifelse((time.df$month == time.df$max_month_for_nade_dth_outcome | time.df$age == time.df$max_age_for_nade_dth_outcome) & ((!is.na(time.df$age_of_first_nade) & time.df$age_of_first_nade <= time.df$last_age) | !is.na(time.df$age_at_death)),1,0) # Combined mental health NADE/Death outcome time.df$max_age_for_mh_nade_dth_outcome <- NA split(time.df$max_age_for_mh_nade_dth_outcome, time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(last_age, age_of_first_mh_nade,age_at_death)), time.df$cfar_pid),FUN=min, na.rm=TRUE) time.df$max_month_for_mh_nade_dth_outcome <- ceiling((time.df$max_age_for_mh_nade_dth_outcome - time.df$age_at_t0)*12) time.df$mh_nade_dth_outcome_indicator <- ifelse((time.df$month == time.df$max_month_for_mh_nade_dth_outcome | time.df$age == time.df$max_age_for_mh_nade_dth_outcome) & ((!is.na(time.df$age_of_first_mh_nade) & time.df$age_of_first_mh_nade <= time.df$last_age) | !is.na(time.df$age_at_death)),1,0) # Combined mental health only NADE/Death outcome time.df$max_age_for_mh_nade_only_dth_outcome <- NA split(time.df$max_age_for_mh_nade_only_dth_outcome, time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(last_age, age_of_first_mh_nade_only,age_at_death)), time.df$cfar_pid),FUN=min, na.rm=TRUE) time.df$max_month_for_mh_nade_only_dth_outcome <- ceiling((time.df$max_age_for_mh_nade_only_dth_outcome - time.df$age_at_t0)*12) time.df$mh_nade_only_dth_outcome_indicator <- ifelse((time.df$month == time.df$max_month_for_mh_nade_only_dth_outcome | time.df$age == time.df$max_age_for_mh_nade_only_dth_outcome) & ((!is.na(time.df$age_of_first_mh_nade_only) & time.df$age_of_first_mh_nade_only <= time.df$last_age) | !is.na(time.df$age_at_death)),1,0) # Combined mental health NADE (excluding SUBSTANCE ABUSE)/Death outcome time.df$max_age_for_mh_nade_no_sa_dth_outcome <- NA split(time.df$max_age_for_mh_nade_no_sa_dth_outcome, time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(last_age, age_of_first_mh_nade_no_sa,age_at_death)), time.df$cfar_pid),FUN=min, na.rm=TRUE) time.df$max_month_for_mh_nade_no_sa_dth_outcome <- ceiling((time.df$max_age_for_mh_nade_no_sa_dth_outcome - time.df$age_at_t0)*12) time.df$mh_nade_no_sa_dth_outcome_indicator <- ifelse((time.df$month == time.df$max_month_for_mh_nade_no_sa_dth_outcome | time.df$age == time.df$max_age_for_mh_nade_no_sa_dth_outcome) & ((!is.na(time.df$age_of_first_mh_nade_no_sa) & time.df$age_of_first_mh_nade_no_sa <= time.df$last_age) | !is.na(time.df$age_at_death)),1,0) # Adding in ADE information time.df$age_at_ade_date1 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_ade_date1'] time.df$age_at_ade_date1 <- ifelse(!is.na(time.df$age_at_ade_date1) & time.df$age_at_ade_date1 > time.df$last_age,NA, time.df$age_at_ade_date1) time.df$month_at_ade_date1 <- ceiling((time.df$age_at_ade_date1 - time.df$age_at_t0)*12+1) time.df$ade1 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'ade1'] time.df$ade1 <- ifelse(is.na(time.df$age_at_ade_date1),NA,time.df$ade1) time.df$age_at_ade_date2 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_ade_date2'] time.df$age_at_ade_date2 <- ifelse(!is.na(time.df$age_at_ade_date2) & time.df$age_at_ade_date2 > time.df$last_age,NA, time.df$age_at_ade_date2) time.df$month_at_ade_date2 <- ceiling((time.df$age_at_ade_date2 - time.df$age_at_t0)*12+1) time.df$ade2 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'ade2'] time.df$ade2 <- ifelse(is.na(time.df$age_at_ade_date2),NA,time.df$ade2) time.df$age_at_ade_date3 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_ade_date3'] time.df$age_at_ade_date3 <- ifelse(!is.na(time.df$age_at_ade_date3) & time.df$age_at_ade_date3 > time.df$last_age,NA, time.df$age_at_ade_date3) time.df$month_at_ade_date3 <- ceiling((time.df$age_at_ade_date3 - time.df$age_at_t0)*12+1) time.df$ade3 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'ade3'] time.df$ade3 <- ifelse(is.na(time.df$age_at_ade_date3),NA,time.df$ade3) time.df$age_at_ade_date4 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'age_at_ade_date4'] time.df$age_at_ade_date4 <- ifelse(!is.na(time.df$age_at_ade_date1) & time.df$age_at_ade_date4 > time.df$last_age,NA, time.df$age_at_ade_date4) time.df$month_at_ade_date4 <- ceiling((time.df$age_at_ade_date4 - time.df$age_at_t0)*12+1) time.df$ade4 <- data[match(time.df$cfar_pid, data$cfar_pid, nomatch=NA),'ade4'] time.df$ade4 <- ifelse(is.na(time.df$age_at_ade_date4),NA,time.df$ade4) time.df$ade_indicator <- ifelse(time.df$month_at_ade_date1 == time.df$month & !is.na(time.df$month_at_ade_date1),1, ifelse(time.df$month_at_ade_date2 == time.df$month & !is.na(time.df$month_at_ade_date2),1, ifelse(time.df$month_at_ade_date3 == time.df$month & !is.na(time.df$month_at_ade_date3),1, ifelse(time.df$month_at_ade_date4 == time.df$month & !is.na(time.df$month_at_ade_date4),1,0)))) # Cumulative HC use # Need to split up by HCP use to get cumulative sum just within that episode of HCP use first. time.df$hcp_number <- NA split(time.df$hcp_number, time.df$cfar_pid) <- lapply(split(subset(time.df,select=c(hcp_date_type_modified,cfar_pid)), time.df$cfar_pid), function(x){ cumsum(x[,1] %in% c('START','END,START','BOTH')) }) time.df$hcp_number <- ifelse(time.df$hcp_ind == 0,NA,time.df$hcp_number) time.df$pid.hcp <- paste(time.df$cfar_pid, time.df$hcp_number, sep='.') sub_time.df <- subset(time.df, hcp_ind == 1, select=c(cfar_pid,age, pid.hcp)) sub_time.df$diff_by_hcp <- NA split(sub_time.df$diff_by_hcp, sub_time.df$pid.hcp) <- lapply(split(sub_time.df$age, sub_time.df$pid.hcp), FUN=function(y){ if(length(unlist(y)) == 1){ return(1/12) } else { return(c(diff(unlist(y)),0)) return(c(diff(unlist(y)),1/12)) } }) sub_time.df$pid.age <- paste(sub_time.df$cfar_pid, sub_time.df$age,sep='.') time.df$pid.age <- paste(time.df$cfar_pid, time.df$age, sep='.') time.df$diff_by_hcp <- sub_time.df[match(time.df$pid.age,sub_time.df$pid.age,nomatch=NA),'diff_by_hcp'] sub_time.df$diff_by_hcp_modified <- NA split(sub_time.df$diff_by_hcp_modified, sub_time.df$pid.hcp) <- lapply(split(sub_time.df$age, sub_time.df$pid.hcp), FUN=function(y){ if(length(unlist(y)) == 1){ return(1/12) } else { return(c(0,diff(unlist(y)))) } }) time.df$diff_by_hcp_modified <- sub_time.df[match(time.df$pid.age, sub_time.df$pid.age,nomatch=NA),'diff_by_hcp_modified'] split(time.df$diff_by_hcp_modified, time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(cfar_pid,diff_by_hcp_modified,hcp_ind)),time.df$cfar_pid),FUN=function(y){ new_diff <- y[,'diff_by_hcp_modified'] if(nrow(y) > 1){ for(i in 2:nrow(y)){ if(y[(i-1),'hcp_ind'] == 1 & y[i,'hcp_ind'] == 0){ new_diff[i] <- 1/12 } } } return(new_diff) }) sub_time.df$diff_by_hcp_modified <- time.df[match(sub_time.df$pid.age, time.df$pid.age,nomatch=NA),'diff_by_hcp_modified'] # Then will sum up the individual cumulative sums to get an overall sum time.df$cum_hcp <- NA sub_time.df$cum_hcp <- NA split(sub_time.df$cum_hcp, sub_time.df$cfar_pid) <- lapply(split(sub_time.df$diff_by_hcp, sub_time.df$cfar_pid), FUN=function(y){ c(0,cumsum(y[-length(unlist(y))]))}) time.df$cum_hcp <- sub_time.df[match(time.df$pid.age, sub_time.df$pid.age, nomatch=NA),'cum_hcp'] # Then will sum up the individual cumulative sums to get an overall sum time.df$cum_hcp_modified <- NA sub_time.df$cum_hcp_modified <- NA split(time.df$cum_hcp_modified, time.df$cfar_pid) <- lapply(split(time.df$diff_by_hcp_modified, time.df$cfar_pid), FUN=function(y){ which.na <- which(is.na(y)) y[which.na] <- 0 c(cumsum(y))}) sub_time.df$cum_hcp_modified <- time.df[match(sub_time.df$pid.age, time.df$pid.age, nomatch=NA),'cum_hcp_modified'] # Then need to fill in missing values with last observation carried forward # Filling in CD4 counts time.df$new_cum_hcp <- unsplit(lapply(split(time.df$cum_hcp,time.df$cfar_pid), function(x){L <- !is.na(x);x[c(NA,which(L))[cumsum(L)+1]]}),time.df$cfar_pid) time.df$new_cum_hcp <- ifelse(is.na(time.df$new_cum_hcp),0,time.df$new_cum_hcp) # Also need to include a lagged hcp_indicator for weighting model -- probability of being on HC this month depends on # HC status last month lag_hcp_indicator <- lapply(split(time.df$hcp_ind, time.df$cfar_pid),FUN=function(y){ c(0,y[-length(y)])}) time.df$lag_hcp_indicator <- unlist(lag_hcp_indicator) # Finally, need logged baseline viral load time.df$log10_baseline_vl <- log10(time.df$baseline_vl_b) # Need to have a cumulative time not on HC for calculating incidence rates time.df$diff_time <- NA split(time.df$diff_time,time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(age,hcp_ind)), time.df$cfar_pid),FUN=function(y){ tmp <- c(0,diff(unlist(y[,'age']))) tmp <- ifelse(y[,'hcp_ind'] == 1,0,tmp) return(tmp) } ) time.df$hcp_diff <- NA split(time.df$hcp_diff, time.df$cfar_pid) <- lapply(split(time.df$hcp_ind, time.df$cfar_pid),FUN=function(y){ tmp <- c(0,diff(unlist(y))) return(tmp) }) time.df$diff_time <- ifelse(time.df$hcp_diff == -1,0.0,time.df$diff_time) time.df$cum_no_hcp <- NA split(time.df$cum_no_hcp, time.df$cfar_pid) <- lapply(split(time.df$diff_time, time.df$cfar_pid),FUN=function(y){ tmp <- cumsum(y) return(tmp) }) # Need to have a cumulative time not on HC for calculating incidence rates time.df$diff_time_modified <- NA split(time.df$diff_time_modified,time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(age,hcp_ind)), time.df$cfar_pid),FUN=function(y){ tmp <- c(0,diff(unlist(y[,'age']))) tmp <- ifelse(y[,'hcp_ind'] == 1,0,tmp) if(nrow(y) > 1){ for(i in 2:nrow(y)){ if(y[(i-1),'hcp_ind'] == 0 & y[i,'hcp_ind'] == 1){ tmp[i] <- 1/12 } if(y[(i-1),'hcp_ind'] == 1 & y[i,'hcp_ind'] == 0){ tmp[i] <- 0 } } } return(tmp) } ) time.df$cum_no_hcp_modified <- NA split(time.df$cum_no_hcp_modified, time.df$cfar_pid) <- lapply(split(time.df$diff_time_modified, time.df$cfar_pid),FUN=function(y){ tmp <- cumsum(y) return(tmp) }) # Calculating person time for different HC types (Oral versus DMPA) time.df$hcp_number_depo <- NA time.df$hcp_number_other <- NA split(time.df$hcp_number_depo, time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(hcp_date_type_modified, hcp_title_depo,cfar_pid)), time.df$cfar_pid),FUN=function(y){ cumsum(y[,1] %in% c('START','END,START','BOTH') & y[,2] == 'Depo-Provera' & !is.na(y[,2]))}) split(time.df$hcp_number_other, time.df$cfar_pid) <- lapply(split(subset(time.df, select=c(hcp_date_type_modified, hcp_other_category,cfar_pid)), time.df$cfar_pid),FUN=function(y){ cumsum(y[,1] %in% c('START','END,START','BOTH') & y[,2] == 'Other' & !is.na(y[,2]))}) time.df$hcp_number_depo <- ifelse(time.df$hcp_ind == 0,NA,time.df$hcp_number_depo) time.df$hcp_number_other <- ifelse(time.df$hcp_ind == 0,NA,time.df$hcp_number_other) time.df$pid.hcp.depo <- paste(time.df$cfar_pid, time.df$hcp_number_depo, sep='.') time.df$pid.hcp.other <- paste(time.df$cfar_pid, time.df$hcp_number_other, sep='.') # Depo sub_time.df <- subset(time.df, hcp_ind == 1 & hcp_title_depo == 'Depo-Provera', select=c(cfar_pid,age, pid.hcp.depo)) sub_time.df$diff_by_depo <- NA split(sub_time.df$diff_by_depo, sub_time.df$pid.hcp.depo) <- lapply(split(sub_time.df$age, sub_time.df$pid.hcp.depo), FUN=function(y){ if(length(unlist(y)) == 1){ return(1/12) } else { return(c(diff(unlist(y)),0)) } }) sub_time.df$pid.age <- paste(sub_time.df$cfar_pid, sub_time.df$age,sep='.') time.df$pid.age <- paste(time.df$cfar_pid, time.df$age, sep='.') time.df$diff_by_depo <- sub_time.df[match(time.df$pid.age,sub_time.df$pid.age,nomatch=NA),'diff_by_depo'] time.df$diff_by_depo <- ifelse(is.na(time.df$diff_by_depo),0,time.df$diff_by_depo) sub_time.df$diff_by_depo <- ifelse(is.na(sub_time.df$diff_by_depo),0,sub_time.df$diff_by_depo) # Then will sum up the individual cumulative sums to get an overall sum time.df$cum_depo <- NA # sub_time.df$cum_depo <- NA split(time.df$cum_depo, time.df$cfar_pid) <- lapply(split(time.df$diff_by_depo, time.df$cfar_pid), FUN=function(y){ c(0,cumsum(y[-length(y)]))}) # Then need to fill in missing values with last observation carried forward # Filling in CD4 counts time.df$new_cum_depo <- unsplit(lapply(split(time.df$cum_depo,time.df$cfar_pid), function(x){L <- !is.na(x);x[c(NA,which(L))[cumsum(L)+1]]}),time.df$cfar_pid) time.df$new_cum_depo <- ifelse(is.na(time.df$new_cum_depo),0,time.df$new_cum_depo) # Other sub_time.df <- subset(time.df, hcp_ind == 1 & hcp_other_category == 'Other', select=c(cfar_pid,age, pid.hcp.other)) sub_time.df$diff_by_other <- NA split(sub_time.df$diff_by_other, sub_time.df$pid.hcp.other) <- lapply(split(sub_time.df$age, sub_time.df$pid.hcp.other), FUN=function(y){ if(length(unlist(y)) == 1){ return(1/12) } else { return(c(diff(unlist(y)),0)) } }) sub_time.df$pid.age <- paste(sub_time.df$cfar_pid, sub_time.df$age,sep='.') time.df$pid.age <- paste(time.df$cfar_pid, time.df$age, sep='.') time.df$diff_by_other <- sub_time.df[match(time.df$pid.age,sub_time.df$pid.age,nomatch=NA),'diff_by_other'] time.df$diff_by_other <- ifelse(is.na(time.df$diff_by_other),0,time.df$diff_by_other) sub_time.df$diff_by_other <- ifelse(is.na(sub_time.df$diff_by_other),0,sub_time.df$diff_by_other) # Then will sum up the individual cumulative sums to get an overall sum time.df$cum_other <- NA sub_time.df$cum_other <- NA split(time.df$cum_other, time.df$cfar_pid) <- lapply(split(time.df$diff_by_other, time.df$cfar_pid), FUN=function(y){ c(0,cumsum(unlist(y[-length(unlist(y))])))}) # Then need to fill in missing values with last observation carried forward # Filling in CD4 counts time.df$new_cum_other <- unsplit(lapply(split(time.df$cum_other,time.df$cfar_pid), function(x){L <- !is.na(x);x[c(NA,which(L))[cumsum(L)+1]]}),time.df$cfar_pid) time.df$new_cum_other <- ifelse(is.na(time.df$new_cum_other),0,time.df$new_cum_other) # Finding the max new_cum_depo and max of new_cum_oral per person time.df$max_cum_depo <- NA split(time.df$max_cum_depo, time.df$cfar_pid) <- lapply(split(time.df$cum_depo, time.df$cfar_pid), FUN=function(y){ max(y,na.rm=TRUE)}) time.df$max_cum_other <- NA split(time.df$max_cum_other, time.df$cfar_pid) <- lapply(split(time.df$cum_other, time.df$cfar_pid), FUN=function(y){ max(y,na.rm=TRUE)}) # Finding total person time for each type of HC (Depo vs oral) total_depo_time <- sum(time.df$max_cum_depo[!duplicated(time.df$cfar_pid)],na.rm=TRUE) total_other_time <- sum(time.df$max_cum_other[!duplicated(time.df$cfar_pid)],na.rm=TRUE) # Table 1 summaries # Need to create a variable for number of months followed, maximum cumulative HC use per person, ADE indicator (max of ade_indicator), # and indicator for who was ever on HAART and ART during follow-up time.df$max_number_months_followed <- NA split(time.df$max_number_months_followed, time.df$cfar_pid) <- lapply(split(time.df$month, time.df$cfar_pid), FUN=function(y){ max(y)}) time.df$max_cum_HC_use <- NA split(time.df$max_cum_HC_use, time.df$cfar_pid) <- lapply(split(time.df$new_cum_hcp, time.df$cfar_pid), FUN=function(y){ max(y)}) time.df$any_ade_during_followup <- NA split(time.df$any_ade_during_followup, time.df$cfar_pid) <- lapply(split(time.df$ade_indicator, time.df$cfar_pid), FUN=function(y){ max(y)}) time.df$any_ade_during_followup.factor <- factor(time.df$any_ade_during_followup, levels=c(0,1), labels=c('No','Yes')) time.df$any_haart_during_followup <- NA split(time.df$any_haart_during_followup, time.df$cfar_pid) <- lapply(split(time.df$haart_ind, time.df$cfar_pid), FUN=function(y){ max(y)}) time.df$any_haart_during_followup.factor <- factor(time.df$any_haart_during_followup, levels=c(0,1), labels=c('No','Yes')) time.df$any_art_during_followup <- NA split(time.df$any_art_during_followup, time.df$cfar_pid) <- lapply(split(time.df$reg_indicator, time.df$cfar_pid), FUN=function(y){ max(y)}) time.df$any_art_during_followup.factor <- factor(time.df$any_art_during_followup, levels=c(0,1), labels=c('No','Yes')) time.df$ade_indicator.factor <- factor(time.df$ade_indicator, levels=c(0,1), labels=c('No','Yes')) time.df$prior_hcp_t0.factor <- factor(time.df$prior_hcp_t0, levels=c(0,1), labels=c('No','Yes')) time.df$prior_hcv_t0.factor <- factor(time.df$prior_hcv_t0, levels=c(0,1), labels=c('No','Yes')) any_hc_use <- tapply(time.df$hcp_ind, INDEX=time.df$cfar_pid, FUN=max) any_hc_use.df <- data.frame('cfar_pid'=names(any_hc_use), 'any_hc_use'=any_hc_use) time.df$any_hc_use <- any_hc_use.df[match(time.df$cfar_pid, any_hc_use.df$cfar_pid,nomatch=NA),'any_hc_use'] time.df$any_hc_use.factor <- factor(time.df$any_hc_use, levels=c(0,1), labels=c('No HC use','HC use')) # Need to include distribution of person time for the different types of HC in Table 1 time.df$prior_non_ade.factor <- factor(time.df$prior_non_ade, levels=c(0,1),labels=c('No','Yes')) time.df$prior_mh_non_ade_no_sa.factor <- factor(time.df$prior_mh_non_ade_no_sa, levels=c(0,1),labels=c('No','Yes')) time.df$prior_mh_non_ade.factor <- factor(time.df$prior_mh_non_ade, levels=c(0,1),labels=c('No','Yes')) time.df$prior_mh_non_ade_only.factor <- factor(time.df$prior_mh_non_ade_only, levels=c(0,1),labels=c('No','Yes')) tbl1.summ <- summary(any_hc_use.factor ~ race_2level.factor + route_of_infection_combined.factor + age_at_t0 + max_number_months_followed + max_cum_HC_use + year_of_t0 + baseline_cd4_count_b + log10_baseline_vl + prior_hcv_t0.factor + any_ade_during_followup.factor + prior_hcp_t0.factor + prior_non_ade.factor + prior_mh_non_ade_no_sa.factor + prior_mh_non_ade.factor + prior_mh_non_ade_only.factor + any_haart_during_followup.factor + any_art_during_followup.factor + max_cum_depo + max_cum_other, data=time.df,digits=2, subset=!duplicated(cfar_pid),method='reverse',overall=TRUE,test=TRUE) tbl1.summ$labels <- c('Race','Route of infection','Baseline age','Months of follow-up','HC use per person (years)', 'Year of first clinic visit','Baseline CD4','Baseline HIV1-RNA (log-transformed)', 'HCV prior to first clinic visit','ADE during followup','HC use prior to first clinic visit', 'Prior NADE','Prior NADE (excluding substance abuse)','Prior NADE (including substance abuse)', 'Prior mental health NADE','Any HAART during followup','Any ART during followup','DMPA HC use per person (years)', 'Non-DMPA HC use per person (years)') tmp <- subset(time.df, any_hc_use == 1) tmp$hcp_title_depo <- factor(tmp$hcp_title_depo) tmp$hcp_title_other <- factor(tmp$hcp_title_other) tmp$all_patients <- 1 tmp.summ <- summary(all_patients ~ tmp$hcp_title_depo + tmp$hcp_title_other, data=tmp, method='reverse') tmp.summ$labels <- c('Summary of Depo use','Summary of non-Depo use') # Function to run weighting and marginal structural models by_outcomes <- function(max_month, out, nade_var, time.df){ df <- subset(time.df, month <= max_month) df <- time.df[which(time.df$month <= time.df[[max_month]]),] fmla.denom <- as.formula(paste('hcp_ind.factor ~ age_at_t0 + rcs(month,4) + race_2level.factor + rcs(cd4_count_b,3) + (1-undetectable_vl)*log10_hiv1_rna_b + haart_ind + reg_indicator + route_of_infection_combined.factor + prior_hcv_t0.factor + year_of_t0 + ade_indicator + prior_hcp_t0 + ',nade_var,sep='')) wt.mod.denom <- glm(fmla.denom,data=df, subset=pregnancy_indicator == 0, family='binomial') denom <- exp(predict(wt.mod.denom))/(1+exp(predict(wt.mod.denom))) #plus time-invariant covariates from above fmla.num <- as.formula(paste('hcp_ind.factor ~ rcs(month,4) + age_at_t0 + race_2level.factor + route_of_infection_combined.factor + prior_hcv_t0.factor + year_of_t0 + prior_hcp_t0 + ',nade_var,sep='')) wt.mod.num <- glm(fmla.num,data=df, subset=pregnancy_indicator == 0 & !is.na(cd4_count_b) & !is.na(log10_hiv1_rna_b),family='binomial') num <- exp(predict(wt.mod.num))/(1+exp(predict(wt.mod.num))) wt <- ifelse(df$hcp_ind[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] == 1, num/denom, (1-num)/(1-denom)) quant.wt <- quantile(wt, probs=c(0.025,0.975)) wt <- ifelse(wt < quant.wt[1], quant.wt[1], ifelse(wt > quant.wt[2],quant.wt[2],wt)) outcome <- eval(parse(text=paste('df$',out,sep='')))[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] df$outcome <- NA df$outcome[which(df$pregnancy_indicator == 0)] <- outcome df$wt <- NA df$wt[which(df$pregnancy_indicator == 0)] <- wt fmla.main <- as.formula(paste('outcome ~ rcs(age_at_t0,3) + month + race_2level.factor + route_of_infection_combined.factor + hcp_ind.factor + new_cum_hcp + prior_hcv_t0.factor + year_of_t0 + baseline_cd4_count_b + log10_baseline_vl + prior_hcp_t0 + ',nade_var,sep='')) msm <- glm(fmla.main, data=df, weights=wt,family='quasibinomial',subset=pregnancy_indicator == 0) outcome2 <- eval(parse(text=paste('df$',out,sep='')))[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] df$outcome2 <- NA df$outcome2[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] <- outcome2 df$outcome2 <- factor(df$outcome2) dd <<- datadist(df) options(datadist='dd') fmla.main2 <- as.formula(paste('outcome2 ~ rcs(age_at_t0,3) + month + race_2level.factor + route_of_infection_combined.factor + hcp_ind.factor + new_cum_hcp + prior_hcv_t0.factor + year_of_t0 + + baseline_cd4_count_b + log10_baseline_vl + prior_hcp_t0 + ',nade_var,sep='')) msm2 <- lrm(fmla.main2, data=df, weights=wt, subset=pregnancy_indicator == 0,tol=-10) fmla.main3 <- as.formula(paste('outcome2 ~ rcs(age_at_t0,3) + month + race_2level.factor + hcp_ind.factor + new_cum_hcp + prior_hcv_t0.factor + route_of_infection_combined.factor + + baseline_cd4_count_b + log10_baseline_vl + prior_hcp_t0 + ',nade_var,sep='')) msm2b <- lrm(fmla.main3, data=df, weights=wt, subset=pregnancy_indicator == 0,tol=-10) return(list(denom=denom, num=num, wt=wt, msm=msm, msm2=msm2, msm2b=msm2b)) } time.df <- upData(time.df, drop='month_of_nade') time.df$prior_hcv_t0.factor <- factor(time.df$prior_hcv_t0, levels=c(0,1), labels = c('No','Yes')) time.df$hcp_ind.factor <- factor(time.df$hcp_ind, levels=c(0,1), labels = c('No','Yes')) nade <- by_outcomes(max_month = 'max_month_for_nade_outcome', out = 'nade_outcome_indicator', nade_var = 'prior_non_ade.factor',time.df) nade.df <- subset(time.df, month <= max_month_for_nade_outcome) dd <- datadist(nade.df) options(datadist='dd') j <- nade[['msm2']] nade.summ <- summary(j, race_2level.factor = 'White', route_of_infection_combined.factor = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,30),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No') nade.summ2 <- summary(j, race_2level.factor = 'White', route_of_infection_combined.factor = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,40),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No') nade.summ3 <- summary(j, race_2level.factor = 'White', route_of_infection_combined.factor = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,50),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No') order.for.summ <- c(which(rownames(nade.summ) == 'age_at_t0'), which(rownames(nade.summ) == 'month'), which(rownames(nade.summ) == 'new_cum_hcp'), which(rownames(nade.summ) == 'year_of_t0'), which(rownames(nade.summ) == 'baseline_cd4_count_b'), which(rownames(nade.summ) == 'log10_baseline_vl'), which(rownames(nade.summ) == 'prior_hcp_t0'), which(rownames(nade.summ) == 'prior_non_ade.factor - Yes:No'), which(rownames(nade.summ) == 'race_2level.factor - Non-white:White'), which(rownames(nade.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'), which(rownames(nade.summ) == 'hcp_ind.factor - Yes:No'), which(rownames(nade.summ) == 'prior_hcv_t0.factor - Yes:No')) order.for.summ <- order.for.summ + 1 nade_or <- format(round(nade.summ[order.for.summ,'Effect'],2),nsmall=2) nade_or2 <- format(round(nade.summ2[2,'Effect'],2),nsmall=2) nade_or3 <- format(round(nade.summ3[2,'Effect'],2),nsmall=2) nade_ci <- paste('(',format(round(nade.summ[order.for.summ,'Lower 0.95'],2),nsmall=2), ', ',format(round(nade.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='') nade_ci2 <- paste('(', format(round(nade.summ2[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(nade.summ2[2,'Upper 0.95'],2),nsmall=2), ')',sep='') nade_ci3 <- paste('(', format(round(nade.summ3[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(nade.summ3[2,'Upper 0.95'],2),nsmall=2), ')',sep='') rownames.anova <- rownames(anova(nade[['msm2']])) order.for.anova <- c(which(rownames.anova == 'age_at_t0'), which(rownames.anova == 'month'), which(rownames.anova == 'new_cum_hcp'), which(rownames.anova == 'year_of_t0'), which(rownames.anova == 'baseline_cd4_count_b'), which(rownames.anova == 'log10_baseline_vl'), which(rownames.anova == 'prior_hcp_t0'), which(rownames.anova == 'prior_non_ade.factor'), which(rownames.anova == 'race_2level.factor'), which(rownames.anova == 'route_of_infection_combined.factor'), which(rownames.anova == 'hcp_ind.factor'), which(rownames.anova == 'prior_hcv_t0.factor')) nade_p <- c(anova(nade[['msm2']])[order.for.anova,'P']) nade_p <- ifelse(nade_p < 0.001, '< 0.001', ifelse(nade_p >= 0.001 & nade_p < 0.01, format(round(nade_p,3),nsmall=2), ifelse(nade_p >= 0.01, format(round(nade_p,2),nsmall=2),''))) nade_p <- ifelse(is.na(nade_p),'', nade_p) time.df <- upData(time.df, drop='month_of_mh_nade') mh_nade.mod <- by_outcomes(max_month = 'max_month_for_mh_nade_outcome', out = 'mh_nade_outcome_indicator', nade_var = 'prior_mh_non_ade.factor', time.df) mh_nade.df <- subset(time.df, month <= max_month_for_mh_nade_outcome) dd <- datadist(mh_nade.df) options(datadist = 'dd') j <- mh_nade.mod[['msm2']] mh_nade.summ <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,30),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No') mh_nade.summ2 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,40),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No') mh_nade.summ3 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,50),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No') order.for.summ <- c(which(rownames(mh_nade.summ) == 'age_at_t0'), which(rownames(mh_nade.summ) == 'month'), which(rownames(mh_nade.summ) == 'new_cum_hcp'), which(rownames(mh_nade.summ) == 'year_of_t0'), which(rownames(mh_nade.summ) == 'baseline_cd4_count_b'), which(rownames(mh_nade.summ) == 'log10_baseline_vl'), which(rownames(mh_nade.summ) == 'prior_hcp_t0'), which(rownames(mh_nade.summ) == 'prior_non_ade.factor - Yes:No'), which(rownames(mh_nade.summ) == 'race_2level.factor - Non-white:White'), which(rownames(mh_nade.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'), which(rownames(mh_nade.summ) == 'hcp_ind.factor - Yes:No'), which(rownames(mh_nade.summ) == 'prior_hcv_t0.factor - Yes:No')) order.for.summ <- order.for.summ + 1 mh_nade_or <- format(round(mh_nade.summ[order.for.summ,'Effect'],2),nsmall=2) mh_nade_or2 <- format(round(mh_nade.summ2[2,'Effect'],2),nsmall=2) mh_nade_or3 <- format(round(mh_nade.summ3[2,'Effect'],2),nsmall=2) mh_nade_ci <- paste('(',format(round(mh_nade.summ[order.for.summ,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='') mh_nade_ci2 <- paste('(', format(round(mh_nade.summ2[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade.summ2[2,'Upper 0.95'],2),nsmall=2), ')',sep='') mh_nade_ci3 <- paste('(', format(round(mh_nade.summ3[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade.summ3[2,'Upper 0.95'],2),nsmall=2), ')',sep='') rownames.anova <- rownames(anova(mh_nade.mod[['msm2']])) order.for.anova <- c(which(rownames.anova == 'age_at_t0'), which(rownames.anova == 'month'), which(rownames.anova == 'new_cum_hcp'), which(rownames.anova == 'year_of_t0'), which(rownames.anova == 'baseline_cd4_count_b'), which(rownames.anova == 'log10_baseline_vl'), which(rownames.anova == 'prior_hcp_t0'), which(rownames.anova == 'prior_mh_non_ade.factor'), which(rownames.anova == 'race_2level.factor'), which(rownames.anova == 'route_of_infection_combined.factor'), which(rownames.anova == 'hcp_ind.factor'), which(rownames.anova == 'prior_hcv_t0.factor')) mh_nade_p <- c(anova(mh_nade.mod[['msm2']])[order.for.anova,'P']) mh_nade_p <- ifelse(mh_nade_p < 0.001, '< 0.001', ifelse(mh_nade_p >= 0.001 & mh_nade_p < 0.01, format(round(mh_nade_p,3),nsmall=2), ifelse(mh_nade_p >= 0.01, format(round(mh_nade_p,2),nsmall=2),''))) mh_nade_p <- ifelse(is.na(mh_nade_p),'', mh_nade_p) mh_nade_no_sa.mod <- by_outcomes(max_month = 'max_month_for_mh_nade_no_sa_outcome', out = 'mh_nade_no_sa_outcome_indicator', nade_var = 'prior_mh_non_ade_no_sa.factor', time.df) mh_nade_no_sa.df <- subset(time.df, month <= max_month_for_mh_nade_no_sa_outcome) dd <- datadist(mh_nade_no_sa.df) options(datadist = 'dd') j <- mh_nade_no_sa.mod[['msm2']] mh_nade_no_sa.summ <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,30),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No') mh_nade_no_sa.summ2 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,40),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No') mh_nade_no_sa.summ3 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,50),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No') order.for.summ <- c(which(rownames(mh_nade_no_sa.summ) == 'age_at_t0'), which(rownames(mh_nade_no_sa.summ) == 'month'), which(rownames(mh_nade_no_sa.summ) == 'new_cum_hcp'), which(rownames(mh_nade_no_sa.summ) == 'year_of_t0'), which(rownames(mh_nade_no_sa.summ) == 'baseline_cd4_count_b'), which(rownames(mh_nade_no_sa.summ) == 'log10_baseline_vl'), which(rownames(mh_nade_no_sa.summ) == 'prior_hcp_t0'), which(rownames(mh_nade_no_sa.summ) == 'prior_mh_non_ade_no_sa.factor - Yes:No'), which(rownames(mh_nade_no_sa.summ) == 'race_2level.factor - Non-white:White'), which(rownames(mh_nade_no_sa.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'), which(rownames(mh_nade_no_sa.summ) == 'hcp_ind.factor - Yes:No'), which(rownames(mh_nade_no_sa.summ) == 'prior_hcv_t0.factor - Yes:No')) order.for.summ <- order.for.summ + 1 mh_nade_no_sa_or <- format(round(mh_nade_no_sa.summ[order.for.summ,'Effect'],2),nsmall=2) mh_nade_no_sa_or2 <- format(round(mh_nade_no_sa.summ2[2,'Effect'],2),nsmall=2) mh_nade_no_sa_or3 <- format(round(mh_nade_no_sa.summ3[2,'Effect'],2),nsmall=2) mh_nade_no_sa_ci <- paste('(',format(round(mh_nade_no_sa.summ[order.for.summ,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_no_sa.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='') mh_nade_no_sa_ci2 <- paste('(', format(round(mh_nade_no_sa.summ2[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_no_sa.summ2[2,'Upper 0.95'],2),nsmall=2), ')',sep='') mh_nade_no_sa_ci3 <- paste('(', format(round(mh_nade_no_sa.summ3[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_no_sa.summ3[2,'Upper 0.95'],2),nsmall=2), ')',sep='') rownames.anova <- rownames(anova(mh_nade_no_sa.mod[['msm2']])) order.for.anova <- c(which(rownames.anova == 'age_at_t0'), which(rownames.anova == 'month'), which(rownames.anova == 'new_cum_hcp'), which(rownames.anova == 'year_of_t0'), which(rownames.anova == 'baseline_cd4_count_b'), which(rownames.anova == 'log10_baseline_vl'), which(rownames.anova == 'prior_hcp_t0'), which(rownames.anova == 'prior_mh_non_ade_no_sa.factor'), which(rownames.anova == 'race_2level.factor'), which(rownames.anova == 'route_of_infection_combined.factor'), which(rownames.anova == 'hcp_ind.factor'), which(rownames.anova == 'prior_hcv_t0.factor')) mh_nade_no_sa_p <- c(anova(mh_nade_no_sa.mod[['msm2']])[order.for.anova,'P']) mh_nade_no_sa_p <- ifelse(mh_nade_no_sa_p < 0.001, '< 0.001', ifelse(mh_nade_no_sa_p >= 0.001 & mh_nade_no_sa_p < 0.01, format(round(mh_nade_no_sa_p,3),nsmall=2), ifelse(mh_nade_no_sa_p >= 0.01, format(round(mh_nade_no_sa_p,2),nsmall=2),''))) mh_nade_no_sa_p <- ifelse(is.na(mh_nade_no_sa_p),'', mh_nade_no_sa_p) #---------------------------------------------# mh_nade_only.mod <- by_outcomes(max_month = 'max_month_for_mh_nade_only_outcome', out = 'mh_nade_only_outcome_indicator', nade_var = 'prior_mh_non_ade_only.factor', time.df) mh_nade_only.df <- subset(time.df, month <= max_month_for_mh_nade_only_outcome) dd <- datadist(mh_nade_only.df) options(datadist = 'dd') j <- mh_nade_only.mod[['msm2']] mh_nade_only.summ <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,30),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No') mh_nade_only.summ2 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,40),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No') mh_nade_only.summ3 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,50),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No') order.for.summ <- c(which(rownames(mh_nade_only.summ) == 'age_at_t0'), which(rownames(mh_nade_only.summ) == 'month'), which(rownames(mh_nade_only.summ) == 'new_cum_hcp'), which(rownames(mh_nade_only.summ) == 'year_of_t0'), which(rownames(mh_nade_only.summ) == 'baseline_cd4_count_b'), which(rownames(mh_nade_only.summ) == 'log10_baseline_vl'), which(rownames(mh_nade_only.summ) == 'prior_hcp_t0'), which(rownames(mh_nade_only.summ) == 'prior_mh_non_ade_only.factor - Yes:No'), which(rownames(mh_nade_only.summ) == 'race_2level.factor - Non-white:White'), which(rownames(mh_nade_only.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'), which(rownames(mh_nade_only.summ) == 'hcp_ind.factor - Yes:No'), which(rownames(mh_nade_only.summ) == 'prior_hcv_t0.factor - Yes:No')) order.for.summ <- order.for.summ + 1 mh_nade_only_or <- format(round(mh_nade_only.summ[order.for.summ,'Effect'],2),nsmall=2) mh_nade_only_or2 <- format(round(mh_nade_only.summ2[2,'Effect'],2),nsmall=2) mh_nade_only_or3 <- format(round(mh_nade_only.summ3[2,'Effect'],2),nsmall=2) mh_nade_only_ci <- paste('(',format(round(mh_nade_only.summ[order.for.summ,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_only.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='') mh_nade_only_ci2 <- paste('(', format(round(mh_nade_only.summ2[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_only.summ2[2,'Upper 0.95'],2),nsmall=2), ')',sep='') mh_nade_only_ci3 <- paste('(', format(round(mh_nade_only.summ3[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_only.summ3[2,'Upper 0.95'],2),nsmall=2), ')',sep='') rownames.anova <- rownames(anova(mh_nade_only.mod[['msm2']])) order.for.anova <- c(which(rownames.anova == 'age_at_t0'), which(rownames.anova == 'month'), which(rownames.anova == 'new_cum_hcp'), which(rownames.anova == 'year_of_t0'), which(rownames.anova == 'baseline_cd4_count_b'), which(rownames.anova == 'log10_baseline_vl'), which(rownames.anova == 'prior_hcp_t0'), which(rownames.anova == 'prior_mh_non_ade_only.factor'), which(rownames.anova == 'race_2level.factor'), which(rownames.anova == 'route_of_infection_combined.factor'), which(rownames.anova == 'hcp_ind.factor'), which(rownames.anova == 'prior_hcv_t0.factor')) mh_nade_only_p <- c(anova(mh_nade_only.mod[['msm2']])[order.for.anova,'P']) mh_nade_only_p <- ifelse(mh_nade_only_p < 0.001, '< 0.001', ifelse(mh_nade_only_p >= 0.001 & mh_nade_only_p < 0.01, format(round(mh_nade_only_p,3),nsmall=2), ifelse(mh_nade_only_p >= 0.01, format(round(mh_nade_only_p,2),nsmall=2),''))) mh_nade_only_p <- ifelse(is.na(mh_nade_only_p),'', mh_nade_only_p) #---------------------------------------------# nade_dth <- by_outcomes(max_month = 'max_month_for_nade_dth_outcome', out = 'nade_dth_outcome_indicator', nade_var = 'prior_non_ade.factor', time.df) nade_dth.df <- subset(time.df, month <= max_month_for_nade_dth_outcome) dd <- datadist(nade_dth.df) options(datadist = 'dd') j <- nade_dth[['msm2']] nade_dth.summ <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,30),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No') nade_dth.summ2 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,40),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No') nade_dth.summ3 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,50),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No') order.for.summ <- c(which(rownames(nade_dth.summ) == 'age_at_t0'), which(rownames(nade_dth.summ) == 'month'), which(rownames(nade_dth.summ) == 'new_cum_hcp'), which(rownames(nade_dth.summ) == 'year_of_t0'), which(rownames(nade_dth.summ) == 'baseline_cd4_count_b'), which(rownames(nade_dth.summ) == 'log10_baseline_vl'), which(rownames(nade_dth.summ) == 'prior_hcp_t0'), which(rownames(nade_dth.summ) == 'prior_non_ade.factor - Yes:No'), which(rownames(nade_dth.summ) == 'race_2level.factor - Non-white:White'), which(rownames(nade_dth.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'), which(rownames(nade_dth.summ) == 'hcp_ind.factor - Yes:No'), which(rownames(nade_dth.summ) == 'prior_hcv_t0.factor - Yes:No')) order.for.summ <- order.for.summ + 1 nade_dth_or <- format(round(nade_dth.summ[order.for.summ,'Effect'],2),nsmall=2) nade_dth_or2 <- format(round(nade_dth.summ2[2,'Effect'],2),nsmall=2) nade_dth_or3 <- format(round(nade_dth.summ3[2,'Effect'],2),nsmall=2) nade_dth_ci <- paste('(',format(round(nade_dth.summ[order.for.summ,'Lower 0.95'],2),nsmall=2), ', ',format(round(nade_dth.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='') nade_dth_ci2 <- paste('(', format(round(nade_dth.summ2[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(nade_dth.summ2[2,'Upper 0.95'],2),nsmall=2), ')',sep='') nade_dth_ci3 <- paste('(', format(round(nade_dth.summ3[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(nade_dth.summ3[2,'Upper 0.95'],2),nsmall=2), ')',sep='') rownames.anova <- rownames(anova(nade_dth[['msm2']])) order.for.anova <- c(which(rownames.anova == 'age_at_t0'), which(rownames.anova == 'month'), which(rownames.anova == 'new_cum_hcp'), which(rownames.anova == 'year_of_t0'), which(rownames.anova == 'baseline_cd4_count_b'), which(rownames.anova == 'log10_baseline_vl'), which(rownames.anova == 'prior_hcp_t0'), which(rownames.anova == 'prior_non_ade.factor'), which(rownames.anova == 'race_2level.factor'), which(rownames.anova == 'route_of_infection_combined.factor'), which(rownames.anova == 'hcp_ind.factor'), which(rownames.anova == 'prior_hcv_t0.factor')) nade_dth_p <- c(anova(nade_dth[['msm2']])[order.for.anova,'P']) nade_dth_p <- ifelse(nade_dth_p < 0.001, '< 0.001', ifelse(nade_dth_p >= 0.001 & nade_dth_p < 0.01, format(round(nade_dth_p,3),nsmall=2), ifelse(nade_dth_p >= 0.01, format(round(nade_dth_p,2),nsmall=2),''))) nade_dth_p <- ifelse(is.na(nade_dth_p),'', nade_dth_p) mh_nade_dth <- by_outcomes(max_month = 'max_month_for_mh_nade_dth_outcome', out = 'mh_nade_dth_outcome_indicator', nade_var = 'prior_mh_non_ade.factor', time.df) mh_nade_dth.df <- subset(time.df, month <= max_month_for_mh_nade_dth_outcome) dd <- datadist(mh_nade_dth.df) options(datadist = 'dd') j <- mh_nade_dth[['msm2']] mh_nade_dth.summ <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,30),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No') mh_nade_dth.summ2 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,40),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No') mh_nade_dth.summ3 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,50),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No') order.for.summ <- c(which(rownames(mh_nade_dth.summ) == 'age_at_t0'), which(rownames(mh_nade_dth.summ) == 'month'), which(rownames(mh_nade_dth.summ) == 'new_cum_hcp'), which(rownames(mh_nade_dth.summ) == 'year_of_t0'), which(rownames(mh_nade_dth.summ) == 'baseline_cd4_count_b'), which(rownames(mh_nade_dth.summ) == 'log10_baseline_vl'), which(rownames(mh_nade_dth.summ) == 'prior_hcp_t0'), which(rownames(mh_nade_dth.summ) == 'prior_mh_non_ade.factor - Yes:No'), which(rownames(mh_nade_dth.summ) == 'race_2level.factor - Non-white:White'), which(rownames(mh_nade_dth.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'), which(rownames(mh_nade_dth.summ) == 'hcp_ind.factor - Yes:No'), which(rownames(mh_nade_dth.summ) == 'prior_hcv_t0.factor - Yes:No')) order.for.summ <- order.for.summ + 1 mh_nade_dth_or <- format(round(mh_nade_dth.summ[order.for.summ,'Effect'],2),nsmall=2) mh_nade_dth_or2 <- format(round(mh_nade_dth.summ2[2,'Effect'],2),nsmall=2) mh_nade_dth_or3 <- format(round(mh_nade_dth.summ3[2,'Effect'],2),nsmall=2) mh_nade_dth_ci <- paste('(',format(round(mh_nade_dth.summ[order.for.summ,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_dth.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='') mh_nade_dth_ci2 <- paste('(', format(round(mh_nade_dth.summ2[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_dth.summ2[2,'Upper 0.95'],2),nsmall=2), ')',sep='') mh_nade_dth_ci3 <- paste('(', format(round(mh_nade_dth.summ3[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_dth.summ3[2,'Upper 0.95'],2),nsmall=2), ')',sep='') rownames.anova <- rownames(anova(mh_nade_dth[['msm2']])) order.for.anova <- c(which(rownames.anova == 'age_at_t0'), which(rownames.anova == 'month'), which(rownames.anova == 'new_cum_hcp'), which(rownames.anova == 'year_of_t0'), which(rownames.anova == 'baseline_cd4_count_b'), which(rownames.anova == 'log10_baseline_vl'), which(rownames.anova == 'prior_hcp_t0'), which(rownames.anova == 'prior_mh_non_ade.factor'), which(rownames.anova == 'race_2level.factor'), which(rownames.anova == 'route_of_infection_combined.factor'), which(rownames.anova == 'hcp_ind.factor'), which(rownames.anova == 'prior_hcv_t0.factor')) mh_nade_dth_p <- c(anova(mh_nade_dth[['msm2']])[order.for.anova,'P']) mh_nade_dth_p <- ifelse(mh_nade_dth_p < 0.001, '< 0.001', ifelse(mh_nade_dth_p >= 0.001 & mh_nade_dth_p < 0.01, format(round(mh_nade_dth_p,3),nsmall=2), ifelse(mh_nade_dth_p >= 0.01, format(round(mh_nade_dth_p,2),nsmall=2),''))) mh_nade_dth_p <- ifelse(is.na(mh_nade_dth_p),'', mh_nade_dth_p) #----------------------------------------------------# mh_nade_only_dth <- by_outcomes(max_month = 'max_month_for_mh_nade_only_dth_outcome', out = 'mh_nade_only_dth_outcome_indicator', nade_var = 'prior_mh_non_ade_only.factor', time.df) mh_nade_only_dth.df <- subset(time.df, month <= max_month_for_mh_nade_only_dth_outcome) dd <- datadist(mh_nade_only_dth.df) options(datadist = 'dd') j <- mh_nade_only_dth[['msm2']] mh_nade_only_dth.summ <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,30),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No') mh_nade_only_dth.summ2 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,40),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No') mh_nade_only_dth.summ3 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,50),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No') order.for.summ <- c(which(rownames(mh_nade_only_dth.summ) == 'age_at_t0'), which(rownames(mh_nade_only_dth.summ) == 'month'), which(rownames(mh_nade_only_dth.summ) == 'new_cum_hcp'), which(rownames(mh_nade_only_dth.summ) == 'year_of_t0'), which(rownames(mh_nade_only_dth.summ) == 'baseline_cd4_count_b'), which(rownames(mh_nade_only_dth.summ) == 'log10_baseline_vl'), which(rownames(mh_nade_only_dth.summ) == 'prior_hcp_t0'), which(rownames(mh_nade_only_dth.summ) == 'prior_mh_non_ade_only.factor - Yes:No'), which(rownames(mh_nade_only_dth.summ) == 'race_2level.factor - Non-white:White'), which(rownames(mh_nade_only_dth.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'), which(rownames(mh_nade_only_dth.summ) == 'hcp_ind.factor - Yes:No'), which(rownames(mh_nade_only_dth.summ) == 'prior_hcv_t0.factor - Yes:No')) order.for.summ <- order.for.summ + 1 mh_nade_only_dth_or <- format(round(mh_nade_only_dth.summ[order.for.summ,'Effect'],2),nsmall=2) mh_nade_only_dth_or2 <- format(round(mh_nade_only_dth.summ2[2,'Effect'],2),nsmall=2) mh_nade_only_dth_or3 <- format(round(mh_nade_only_dth.summ3[2,'Effect'],2),nsmall=2) mh_nade_only_dth_ci <- paste('(',format(round(mh_nade_only_dth.summ[order.for.summ,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_only_dth.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='') mh_nade_only_dth_ci2 <- paste('(', format(round(mh_nade_only_dth.summ2[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_only_dth.summ2[2,'Upper 0.95'],2),nsmall=2), ')',sep='') mh_nade_only_dth_ci3 <- paste('(', format(round(mh_nade_only_dth.summ3[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_only_dth.summ3[2,'Upper 0.95'],2),nsmall=2), ')',sep='') rownames.anova <- rownames(anova(mh_nade_only_dth[['msm2']])) order.for.anova <- c(which(rownames.anova == 'age_at_t0'), which(rownames.anova == 'month'), which(rownames.anova == 'new_cum_hcp'), which(rownames.anova == 'year_of_t0'), which(rownames.anova == 'baseline_cd4_count_b'), which(rownames.anova == 'log10_baseline_vl'), which(rownames.anova == 'prior_hcp_t0'), which(rownames.anova == 'prior_mh_non_ade_only.factor'), which(rownames.anova == 'race_2level.factor'), which(rownames.anova == 'route_of_infection_combined.factor'), which(rownames.anova == 'hcp_ind.factor'), which(rownames.anova == 'prior_hcv_t0.factor')) mh_nade_only_dth_p <- c(anova(mh_nade_only_dth[['msm2']])[order.for.anova,'P']) mh_nade_only_dth_p <- ifelse(mh_nade_only_dth_p < 0.001, '< 0.001', ifelse(mh_nade_only_dth_p >= 0.001 & mh_nade_only_dth_p < 0.01, format(round(mh_nade_only_dth_p,3),nsmall=2), ifelse(mh_nade_only_dth_p >= 0.01, format(round(mh_nade_only_dth_p,2),nsmall=2),''))) mh_nade_only_dth_p <- ifelse(is.na(mh_nade_only_dth_p),'', mh_nade_only_dth_p) mh_nade_no_sa_dth <- by_outcomes(max_month = 'max_month_for_mh_nade_no_sa_dth_outcome', out = 'mh_nade_no_sa_dth_outcome_indicator', nade_var = 'prior_mh_non_ade_no_sa.factor', time.df) mh_nade_no_sa_dth.df <- subset(time.df, month <= max_month_for_mh_nade_no_sa_dth_outcome) dd <- datadist(mh_nade_no_sa_dth.df) options(datadist = 'dd') j <- mh_nade_no_sa_dth[['msm2']] mh_nade_no_sa_dth.summ <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,30),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No') mh_nade_no_sa_dth.summ2 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,40),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No') mh_nade_no_sa_dth.summ3 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,50),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No') order.for.summ <- c(which(rownames(mh_nade_no_sa_dth.summ) == 'age_at_t0'), which(rownames(mh_nade_no_sa_dth.summ) == 'month'), which(rownames(mh_nade_no_sa_dth.summ) == 'new_cum_hcp'), which(rownames(mh_nade_no_sa_dth.summ) == 'year_of_t0'), which(rownames(mh_nade_no_sa_dth.summ) == 'baseline_cd4_count_b'), which(rownames(mh_nade_no_sa_dth.summ) == 'log10_baseline_vl'), which(rownames(mh_nade_no_sa_dth.summ) == 'prior_hcp_t0'), which(rownames(mh_nade_no_sa_dth.summ) == 'prior_mh_non_ade_no_sa.factor - Yes:No'), which(rownames(mh_nade_no_sa_dth.summ) == 'race_2level.factor - Non-white:White'), which(rownames(mh_nade_no_sa_dth.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'), which(rownames(mh_nade_no_sa_dth.summ) == 'hcp_ind.factor - Yes:No'), which(rownames(mh_nade_no_sa_dth.summ) == 'prior_hcv_t0.factor - Yes:No')) order.for.summ <- order.for.summ + 1 mh_nade_no_sa_dth_or <- format(round(mh_nade_no_sa_dth.summ[order.for.summ,'Effect'],2),nsmall=2) mh_nade_no_sa_dth_or2 <- format(round(mh_nade_no_sa_dth.summ2[2,'Effect'],2),nsmall=2) mh_nade_no_sa_dth_or3 <- format(round(mh_nade_no_sa_dth.summ3[2,'Effect'],2),nsmall=2) mh_nade_no_sa_dth_ci <- paste('(',format(round(mh_nade_no_sa_dth.summ[order.for.summ,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_no_sa_dth.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='') mh_nade_no_sa_dth_ci2 <- paste('(', format(round(mh_nade_no_sa_dth.summ2[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_no_sa_dth.summ2[2,'Upper 0.95'],2),nsmall=2), ')',sep='') mh_nade_no_sa_dth_ci3 <- paste('(', format(round(mh_nade_no_sa_dth.summ3[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_no_sa_dth.summ3[2,'Upper 0.95'],2),nsmall=2), ')',sep='') rownames.anova <- rownames(anova(mh_nade_no_sa_dth[['msm2']])) order.for.anova <- c(which(rownames.anova == 'age_at_t0'), which(rownames.anova == 'month'), which(rownames.anova == 'new_cum_hcp'), which(rownames.anova == 'year_of_t0'), which(rownames.anova == 'baseline_cd4_count_b'), which(rownames.anova == 'log10_baseline_vl'), which(rownames.anova == 'prior_hcp_t0'), which(rownames.anova == 'prior_mh_non_ade_no_sa.factor'), which(rownames.anova == 'race_2level.factor'), which(rownames.anova == 'route_of_infection_combined.factor'), which(rownames.anova == 'hcp_ind.factor'), which(rownames.anova == 'prior_hcv_t0.factor')) mh_nade_no_sa_dth_p <- c(anova(mh_nade_no_sa_dth[['msm2']])[order.for.anova,'P']) mh_nade_no_sa_dth_p <- ifelse(mh_nade_no_sa_dth_p < 0.001, '< 0.001', ifelse(mh_nade_no_sa_dth_p >= 0.001 & mh_nade_no_sa_dth_p < 0.01, format(round(mh_nade_no_sa_dth_p,3),nsmall=2), ifelse(mh_nade_no_sa_dth_p >= 0.01, format(round(mh_nade_no_sa_dth_p,2),nsmall=2),''))) mh_nade_no_sa_dth_p <- ifelse(is.na(mh_nade_no_sa_dth_p),'', mh_nade_no_sa_dth_p) death <- by_outcomes(max_month = 'max_month_for_death_outcome', out = 'death_outcome_indicator', nade_var = 'prior_non_ade.factor', time.df) death.df <- subset(time.df, month <= max_month_for_death_outcome) dd <- datadist(death.df) options(datadist = 'dd') dd <- datadist(death.df) options(datadist='dd') j <- death[['msm2']] death.summ <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',hcp_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(50,55),new_cum_hcp = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No') death_or <- format(round(death.summ[which(rownames(death.summ) == ' Odds Ratio'),'Effect'],2),nsmall=2) death_ci <- paste('(', format(round(death.summ[which(rownames(death.summ) == ' Odds Ratio'),'Lower 0.95'],2),nsmall=2), ', ',format(round(death.summ[which(rownames(death.summ) == ' Odds Ratio'),'Upper 0.95'],2),nsmall=2), ')',sep='') rownames.anova <- rownames(anova(death[['msm2']])) order.for.anova <- c(which(rownames.anova == 'age_at_t0'), which(rownames.anova == 'month'), which(rownames.anova == 'new_cum_hcp'), which(rownames.anova == 'year_of_t0'), which(rownames.anova == 'baseline_cd4_count_b'), which(rownames.anova == 'log10_baseline_vl'), which(rownames.anova == 'prior_hcp_t0'), which(rownames.anova == 'prior_non_ade.factor'), which(rownames.anova == 'race_2level.factor'), which(rownames.anova == 'route_of_infection_combined.factor'), which(rownames.anova == 'hcp_ind.factor'), which(rownames.anova == 'prior_hcv_t0.factor')) death_p <- c(anova(death[['msm2']])[order.for.anova[1:7],'P'], NA, anova(death[['msm2']])[order.for.anova[8],'P'], NA, anova(death[['msm2']])[order.for.anova[9:10],'P']) death_p <- ifelse(death_p < 0.001, '< 0.001', ifelse(death_p >= 0.001 & death_p < 0.01, round(death_p,3), ifelse(death_p >= 0.01, round(death_p,2),''))) death_p <- ifelse(is.na(death_p),'', death_p) # Just need a simple model with death as the outcome df <- subset(time.df, month <= max_month_for_death_outcome) df <- time.df[which(time.df$month <= time.df[['max_month_for_death_outcome']]),] wt.mod.denom <- glm(hcp_ind.factor ~ age_at_t0 + rcs(month,4) + race_2level.factor + rcs(cd4_count_b,3) + (1-undetectable_vl)*log10_hiv1_rna_b + haart_ind + reg_indicator + route_of_infection_combined.factor + prior_hcv_t0.factor + year_of_t0 + ade_indicator + prior_hcp_t0 + prior_non_ade.factor, data=df, subset=pregnancy_indicator == 0, family='binomial') denom <- exp(predict(wt.mod.denom))/(1+exp(predict(wt.mod.denom))) #plus time-invariant covariates from above wt.mod.num <- glm(hcp_ind.factor ~ rcs(month,4), data=df, subset=pregnancy_indicator == 0 & !is.na(cd4_count_b) & !is.na(log10_hiv1_rna_b),family='binomial') num <- exp(predict(wt.mod.num))/(1+exp(predict(wt.mod.num))) wt <- ifelse(df$hcp_ind[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] == 1, num/denom, (1-num)/(1-denom)) quant.wt <- quantile(wt, probs=c(0.025,0.975)) wt <- ifelse(wt < quant.wt[1],quant.wt[1], ifelse(wt > quant.wt[2],quant.wt[2],wt)) outcome <- df$death_outcome_indicator[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] hcp_ind.factor <- df$hcp_ind.factor[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] new_cum_hcp <- df$new_cum_hcp[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] pregnancy_indicator <- df$pregnancy_indicator[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] msm <- glm(outcome ~ hcp_ind.factor + new_cum_hcp, weights=wt, family='quasibinomial',subset=pregnancy_indicator == 0) outcome2 <- factor(outcome) dd <<- datadist(df) options(datadist='dd') msm2 <- lrm(outcome2 ~ hcp_ind.factor + new_cum_hcp,weights=wt, subset=pregnancy_indicator == 0,tol=-10) death.simple.summ <- summary(msm2, hcp_ind.factor = 'No',new_cum_hcp = c(0,1)) death_simple_or <- format(round(death.simple.summ[which(rownames(death.simple.summ) == ' Odds Ratio'),'Effect'],2),nsmall=2) death_simple_ci <- paste('(', format(round(death.simple.summ[which(rownames(death.simple.summ) == ' Odds Ratio'), 'Lower 0.95'],2),nsmall=2), ', ',format(round(death.simple.summ[which(rownames(death.simple.summ) == ' Odds Ratio'), 'Upper 0.95'],2),nsmall=2),')',sep='') rownames.anova <- rownames(anova(msm2)) order.for.anova <- c(which(rownames.anova == 'new_cum_hcp'), which(rownames.anova == 'hcp_ind.factor')) death_simple_p <- c(anova(msm2)[order.for.anova[1:2],'P']) death_simple_p <- ifelse(death_simple_p < 0.001, '< 0.001', ifelse(death_simple_p >= 0.001 & death_simple_p < 0.01, round(death_simple_p,3), ifelse(death_simple_p >= 0.01, round(death_simple_p,2),''))) death_simple_p <- ifelse(is.na(death_simple_p),'', death_simple_p) by_outcomes_with_depo <- function(max_month, out, nade_var, time.df){ df <- subset(time.df, month <= max_month) df <- time.df[which(time.df$month <= time.df[[max_month]]),] fmla.denom <- as.formula(paste('depo_ind.factor ~ age_at_t0 + rcs(month,4) + race_2level.factor + rcs(cd4_count_b,3) + (1-undetectable_vl)*log10_hiv1_rna_b + haart_ind + reg_indicator + route_of_infection_combined.factor + prior_hcv_t0.factor + year_of_t0 + ade_indicator + prior_hcp_t0 + ',nade_var,sep='')) wt.mod.denom <- glm(fmla.denom,data=df, subset=pregnancy_indicator == 0, family='binomial') denom <- exp(predict(wt.mod.denom))/(1+exp(predict(wt.mod.denom))) #plus time-invariant covariates from above fmla.num <- as.formula(paste('depo_ind.factor ~ rcs(month,4) + age_at_t0 + race_2level.factor + route_of_infection_combined.factor + prior_hcv_t0.factor + year_of_t0 + prior_hcp_t0 + ',nade_var,sep='')) wt.mod.num <- glm(fmla.num,data=df, subset=pregnancy_indicator == 0 & !is.na(cd4_count_b) & !is.na(log10_hiv1_rna_b),family='binomial') num <- exp(predict(wt.mod.num))/(1+exp(predict(wt.mod.num))) wt <- ifelse(df$hcp_ind[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] == 1, num/denom, (1-num)/(1-denom)) quant.wt <- quantile(wt, probs=c(0.025,0.975)) wt <- ifelse(wt < quant.wt[1], quant.wt[1], ifelse(wt > quant.wt[2],quant.wt[2],wt)) outcome <- eval(parse(text=paste('df$',out,sep='')))[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] df$outcome <- NA df$outcome[which(df$pregnancy_indicator == 0)] <- outcome df$wt <- NA df$wt[which(df$pregnancy_indicator == 0)] <- wt fmla.main <- as.formula(paste('outcome ~ rcs(age_at_t0,3) + month + race_2level.factor + route_of_infection_combined.factor + depo_ind.factor + cum_depo + prior_hcv_t0.factor + year_of_t0 + baseline_cd4_count_b + log10_baseline_vl + prior_hcp_t0 + ',nade_var,sep='')) msm <- glm(fmla.main, data=df, weights=wt, family='quasibinomial',subset=pregnancy_indicator == 0) outcome2 <- eval(parse(text=paste('df$',out,sep='')))[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] df$outcome2 <- NA df$outcome2[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] <- outcome2 df$outcome2 <- factor(df$outcome2) dd <<- datadist(df) options(datadist='dd') fmla.main2 <- as.formula(paste('outcome2 ~ rcs(age_at_t0,3) + month + race_2level.factor + route_of_infection_combined.factor + depo_ind.factor + cum_depo + prior_hcv_t0.factor + year_of_t0 + + baseline_cd4_count_b + log10_baseline_vl + prior_hcp_t0 + ',nade_var,sep='')) msm2 <- lrm(fmla.main2, data=df, weights=wt, subset=pregnancy_indicator == 0,tol=-10) fmla.main3 <- as.formula(paste('outcome2 ~ rcs(age_at_t0,3) + month + race_2level.factor + depo_ind.factor + cum_depo + prior_hcv_t0.factor + route_of_infection_combined.factor + + baseline_cd4_count_b + log10_baseline_vl + prior_hcp_t0 + ',nade_var,sep='')) msm2b <- lrm(fmla.main3, data=df, weights=wt, subset=pregnancy_indicator == 0,tol=-10) return(list(denom=denom, num=num, wt=wt, msm=msm, msm2=msm2, msm2b=msm2b)) } time.df$depo_ind.factor <- factor(time.df$hcp_depo_ind, levels=c(0,1), labels = c('No','Yes')) nade.depo <- by_outcomes_with_depo(max_month = 'max_month_for_nade_outcome', out = 'nade_outcome_indicator', nade_var = 'prior_non_ade.factor',time.df) nade.depo.df <- subset(time.df, month <= max_month_for_nade_outcome) dd <- datadist(nade.depo.df) options(datadist='dd') j <- nade.depo[['msm2']] nade.depo.summ <- summary(j, race_2level.factor = 'White', route_of_infection_combined.factor = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,30),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No') nade.depo.summ2 <- summary(j, race_2level.factor = 'White', route_of_infection_combined.factor = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,40),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No') nade.depo.summ3 <- summary(j, race_2level.factor = 'White', route_of_infection_combined.factor = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,50),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No') order.for.summ <- c(which(rownames(nade.depo.summ) == 'age_at_t0'), which(rownames(nade.depo.summ) == 'month'), which(rownames(nade.depo.summ) == 'cum_depo'), which(rownames(nade.depo.summ) == 'year_of_t0'), which(rownames(nade.depo.summ) == 'baseline_cd4_count_b'), which(rownames(nade.depo.summ) == 'log10_baseline_vl'), which(rownames(nade.depo.summ) == 'prior_hcp_t0'), which(rownames(nade.depo.summ) == 'prior_non_ade.factor - Yes:No'), which(rownames(nade.depo.summ) == 'race_2level.factor - Non-white:White'), which(rownames(nade.depo.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'), which(rownames(nade.depo.summ) == 'depo_ind.factor - Yes:No'), which(rownames(nade.depo.summ) == 'prior_hcv_t0.factor - Yes:No')) order.for.summ <- order.for.summ + 1 nade_depo_or <- format(round(nade.depo.summ[order.for.summ,'Effect'],2),nsmall=2) nade_depo_or2 <- format(round(nade.depo.summ2[2,'Effect'],2),nsmall=2) nade_depo_or3 <- format(round(nade.depo.summ3[2,'Effect'],2),nsmall=2) nade_depo_ci <- paste('(',format(round(nade.depo.summ[order.for.summ,'Lower 0.95'],2),nsmall=2), ', ',format(round(nade.depo.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='') nade_depo_ci2 <- paste('(', format(round(nade.depo.summ2[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(nade.depo.summ2[2,'Upper 0.95'],2),nsmall=2), ')',sep='') nade_depo_ci3 <- paste('(', format(round(nade.depo.summ3[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(nade.depo.summ3[2,'Upper 0.95'],2),nsmall=2), ')',sep='') rownames.anova <- rownames(anova(nade.depo[['msm2']])) order.for.anova <- c(which(rownames.anova == 'age_at_t0'), which(rownames.anova == 'month'), which(rownames.anova == 'cum_depo'), which(rownames.anova == 'year_of_t0'), which(rownames.anova == 'baseline_cd4_count_b'), which(rownames.anova == 'log10_baseline_vl'), which(rownames.anova == 'prior_hcp_t0'), which(rownames.anova == 'prior_non_ade.factor'), which(rownames.anova == 'race_2level.factor'), which(rownames.anova == 'route_of_infection_combined.factor'), which(rownames.anova == 'depo_ind.factor'), which(rownames.anova == 'prior_hcv_t0.factor')) nade_depo_p <- c(anova(nade.depo[['msm2']])[order.for.anova,'P']) nade_depo_p <- ifelse(nade_depo_p < 0.001, '< 0.001', ifelse(nade_depo_p >= 0.001 & nade_depo_p < 0.01, format(round(nade_depo_p,3),nsmall=2), ifelse(nade_depo_p >= 0.01, format(round(nade_depo_p,2),nsmall=2),''))) nade_depo_p <- ifelse(is.na(nade_depo_p),'', nade_depo_p) mh_nade.depo.mod <- by_outcomes_with_depo(max_month = 'max_month_for_mh_nade_outcome', out = 'mh_nade_outcome_indicator', nade_var = 'prior_mh_non_ade.factor', time.df) mh_nade.depo.df <- subset(time.df, month <= max_month_for_mh_nade_outcome) dd <- datadist(mh_nade.depo.df) options(datadist = 'dd') dd <- datadist(mh_nade.depo.df) options(datadist='dd') j <- mh_nade.depo.mod[['msm2']] mh_nade.depo.summ <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,30),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No') mh_nade.depo.summ2 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,40),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No') mh_nade.depo.summ3 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,50),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No') order.for.summ <- c(which(rownames(mh_nade.depo.summ) == 'age_at_t0'), which(rownames(mh_nade.depo.summ) == 'month'), which(rownames(mh_nade.depo.summ) == 'cum_depo'), which(rownames(mh_nade.depo.summ) == 'year_of_t0'), which(rownames(mh_nade.depo.summ) == 'baseline_cd4_count_b'), which(rownames(mh_nade.depo.summ) == 'log10_baseline_vl'), which(rownames(mh_nade.depo.summ) == 'prior_hcp_t0'), which(rownames(mh_nade.depo.summ) == 'prior_mh_non_ade.factor - Yes:No'), which(rownames(mh_nade.depo.summ) == 'race_2level.factor - Non-white:White'), which(rownames(mh_nade.depo.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'), which(rownames(mh_nade.depo.summ) == 'depo_ind.factor - Yes:No'), which(rownames(mh_nade.depo.summ) == 'prior_hcv_t0.factor - Yes:No')) order.for.summ <- order.for.summ + 1 mh_nade_depo_or <- format(round(mh_nade.depo.summ[order.for.summ,'Effect'],2),nsmall=2) mh_nade_depo_or2 <- format(round(mh_nade.depo.summ2[2,'Effect'],2),nsmall=2) mh_nade_depo_or3 <- format(round(mh_nade.depo.summ3[2,'Effect'],2),nsmall=2) mh_nade_depo_ci <- paste('(',format(round(mh_nade.depo.summ[order.for.summ,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade.depo.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='') mh_nade_depo_ci2 <- paste('(', format(round(mh_nade.depo.summ2[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade.depo.summ2[2,'Upper 0.95'],2),nsmall=2), ')',sep='') mh_nade_depo_ci3 <- paste('(', format(round(mh_nade.depo.summ3[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade.depo.summ3[2,'Upper 0.95'],2),nsmall=2), ')',sep='') rownames.anova <- rownames(anova(mh_nade.depo.mod[['msm2']])) order.for.anova <- c(which(rownames.anova == 'age_at_t0'), which(rownames.anova == 'month'), which(rownames.anova == 'cum_depo'), which(rownames.anova == 'year_of_t0'), which(rownames.anova == 'baseline_cd4_count_b'), which(rownames.anova == 'log10_baseline_vl'), which(rownames.anova == 'prior_hcp_t0'), which(rownames.anova == 'prior_mh_non_ade.factor'), which(rownames.anova == 'race_2level.factor'), which(rownames.anova == 'route_of_infection_combined.factor'), which(rownames.anova == 'depo_ind.factor'), which(rownames.anova == 'prior_hcv_t0.factor')) mh_nade_depo_p <- c(anova(mh_nade.depo.mod[['msm2']])[order.for.anova,'P']) mh_nade_depo_p <- ifelse(mh_nade_depo_p < 0.001, '< 0.001', ifelse(mh_nade_depo_p >= 0.001 & mh_nade_depo_p < 0.01, format(round(mh_nade_depo_p,3),nsmall=2), ifelse(mh_nade_depo_p >= 0.01, format(round(mh_nade_depo_p,2),nsmall=2),''))) mh_nade_depo_p <- ifelse(is.na(mh_nade_depo_p),'', mh_nade_depo_p) #----------------------------------------------# mh_nade_only.depo.mod <- by_outcomes_with_depo(max_month = 'max_month_for_mh_nade_only_outcome', out = 'mh_nade_only_outcome_indicator', nade_var = 'prior_mh_non_ade_only.factor', time.df) mh_nade_only.depo.df <- subset(time.df, month <= max_month_for_mh_nade_only_outcome) dd <- datadist(mh_nade_only.depo.df) options(datadist = 'dd') j <- mh_nade_only.depo.mod[['msm2']] mh_nade_only.depo.summ <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,30),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No') mh_nade_only.depo.summ2 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,40),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No') mh_nade_only.depo.summ3 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,50),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No') order.for.summ <- c(which(rownames(mh_nade_only.depo.summ) == 'age_at_t0'), which(rownames(mh_nade_only.depo.summ) == 'month'), which(rownames(mh_nade_only.depo.summ) == 'cum_depo'), which(rownames(mh_nade_only.depo.summ) == 'year_of_t0'), which(rownames(mh_nade_only.depo.summ) == 'baseline_cd4_count_b'), which(rownames(mh_nade_only.depo.summ) == 'log10_baseline_vl'), which(rownames(mh_nade_only.depo.summ) == 'prior_hcp_t0'), which(rownames(mh_nade_only.depo.summ) == 'prior_mh_non_ade_only.factor - Yes:No'), which(rownames(mh_nade_only.depo.summ) == 'race_2level.factor - Non-white:White'), which(rownames(mh_nade_only.depo.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'), which(rownames(mh_nade_only.depo.summ) == 'depo_ind.factor - Yes:No'), which(rownames(mh_nade_only.depo.summ) == 'prior_hcv_t0.factor - Yes:No')) order.for.summ <- order.for.summ + 1 mh_nade_only_depo_or <- format(round(mh_nade_only.depo.summ[order.for.summ,'Effect'],2),nsmall=2) mh_nade_only_depo_or2 <- format(round(mh_nade_only.depo.summ2[2,'Effect'],2),nsmall=2) mh_nade_only_depo_or3 <- format(round(mh_nade_only.depo.summ3[2,'Effect'],2),nsmall=2) mh_nade_only_depo_ci <- paste('(',format(round(mh_nade_only.depo.summ[order.for.summ,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_only.depo.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='') mh_nade_only_depo_ci2 <- paste('(', format(round(mh_nade_only.depo.summ2[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_only.depo.summ2[2,'Upper 0.95'],2),nsmall=2), ')',sep='') mh_nade_only_depo_ci3 <- paste('(', format(round(mh_nade_only.depo.summ3[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_only.depo.summ3[2,'Upper 0.95'],2),nsmall=2), ')',sep='') rownames.anova <- rownames(anova(mh_nade_only.depo.mod[['msm2']])) order.for.anova <- c(which(rownames.anova == 'age_at_t0'), which(rownames.anova == 'month'), which(rownames.anova == 'cum_depo'), which(rownames.anova == 'year_of_t0'), which(rownames.anova == 'baseline_cd4_count_b'), which(rownames.anova == 'log10_baseline_vl'), which(rownames.anova == 'prior_hcp_t0'), which(rownames.anova == 'prior_mh_non_ade_only.factor'), which(rownames.anova == 'race_2level.factor'), which(rownames.anova == 'route_of_infection_combined.factor'), which(rownames.anova == 'depo_ind.factor'), which(rownames.anova == 'prior_hcv_t0.factor')) mh_nade_only_depo_p <- c(anova(mh_nade_only.depo.mod[['msm2']])[order.for.anova,'P']) mh_nade_only_depo_p <- ifelse(mh_nade_only_depo_p < 0.001, '< 0.001', ifelse(mh_nade_only_depo_p >= 0.001 & mh_nade_only_depo_p < 0.01, format(round(mh_nade_only_depo_p,3),nsmall=2), ifelse(mh_nade_only_depo_p >= 0.01, format(round(mh_nade_only_depo_p,2),nsmall=2),''))) mh_nade_only_depo_p <- ifelse(is.na(mh_nade_only_depo_p),'', mh_nade_only_depo_p) mh_nade_no_sa.depo.mod <- by_outcomes_with_depo(max_month = 'max_month_for_mh_nade_no_sa_outcome', out = 'mh_nade_no_sa_outcome_indicator', nade_var = 'prior_mh_non_ade_no_sa.factor', time.df) mh_nade_no_sa.depo.df <- subset(time.df, month <= max_month_for_mh_nade_no_sa_outcome) dd <- datadist(mh_nade_no_sa.depo.df) options(datadist = 'dd') j <- mh_nade_no_sa.depo.mod[['msm2']] mh_nade_no_sa.depo.summ <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,30),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No') mh_nade_no_sa.depo.summ2 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,40),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No') mh_nade_no_sa.depo.summ3 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month = c(10,16), age_at_t0 = c(35,50),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No') order.for.summ <- c(which(rownames(mh_nade_no_sa.depo.summ) == 'age_at_t0'), which(rownames(mh_nade_no_sa.depo.summ) == 'month'), which(rownames(mh_nade_no_sa.depo.summ) == 'cum_depo'), which(rownames(mh_nade_no_sa.depo.summ) == 'year_of_t0'), which(rownames(mh_nade_no_sa.depo.summ) == 'baseline_cd4_count_b'), which(rownames(mh_nade_no_sa.depo.summ) == 'log10_baseline_vl'), which(rownames(mh_nade_no_sa.depo.summ) == 'prior_hcp_t0'), which(rownames(mh_nade_no_sa.depo.summ) == 'prior_mh_non_ade_no_sa.factor - Yes:No'), which(rownames(mh_nade_no_sa.depo.summ) == 'race_2level.factor - Non-white:White'), which(rownames(mh_nade_no_sa.depo.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'), which(rownames(mh_nade_no_sa.depo.summ) == 'depo_ind.factor - Yes:No'), which(rownames(mh_nade_no_sa.depo.summ) == 'prior_hcv_t0.factor - Yes:No')) order.for.summ <- order.for.summ + 1 mh_nade_no_sa_depo_or <- format(round(mh_nade_no_sa.depo.summ[order.for.summ,'Effect'],2),nsmall=2) mh_nade_no_sa_depo_or2 <- format(round(mh_nade_no_sa.depo.summ2[2,'Effect'],2),nsmall=2) mh_nade_no_sa_depo_or3 <- format(round(mh_nade_no_sa.depo.summ3[2,'Effect'],2),nsmall=2) mh_nade_no_sa_depo_ci <- paste('(',format(round(mh_nade_no_sa.depo.summ[order.for.summ,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_no_sa.depo.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='') mh_nade_no_sa_depo_ci2 <- paste('(', format(round(mh_nade_no_sa.depo.summ2[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_no_sa.depo.summ2[2,'Upper 0.95'],2),nsmall=2), ')',sep='') mh_nade_no_sa_depo_ci3 <- paste('(', format(round(mh_nade_no_sa.depo.summ3[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_no_sa.depo.summ3[2,'Upper 0.95'],2),nsmall=2), ')',sep='') rownames.anova <- rownames(anova(mh_nade_no_sa.depo.mod[['msm2']])) order.for.anova <- c(which(rownames.anova == 'age_at_t0'), which(rownames.anova == 'month'), which(rownames.anova == 'cum_depo'), which(rownames.anova == 'year_of_t0'), which(rownames.anova == 'baseline_cd4_count_b'), which(rownames.anova == 'log10_baseline_vl'), which(rownames.anova == 'prior_hcp_t0'), which(rownames.anova == 'prior_mh_non_ade_no_sa.factor'), which(rownames.anova == 'race_2level.factor'), which(rownames.anova == 'route_of_infection_combined.factor'), which(rownames.anova == 'depo_ind.factor'), which(rownames.anova == 'prior_hcv_t0.factor')) mh_nade_no_sa_depo_p <- c(anova(mh_nade_no_sa.depo.mod[['msm2']])[order.for.anova,'P']) mh_nade_no_sa_depo_p <- ifelse(mh_nade_no_sa_depo_p < 0.001, '< 0.001', ifelse(mh_nade_no_sa_depo_p >= 0.001 & mh_nade_no_sa_depo_p < 0.01, format(round(mh_nade_no_sa_depo_p,3),nsmall=2), ifelse(mh_nade_no_sa_depo_p >= 0.01, format(round(mh_nade_no_sa_depo_p,2),nsmall=2),''))) mh_nade_no_sa_depo_p <- ifelse(is.na(mh_nade_no_sa_depo_p),'', mh_nade_no_sa_depo_p) #----------------------------------------------# nade_dth_depo <- by_outcomes_with_depo(max_month = 'max_month_for_nade_dth_outcome', out = 'nade_dth_outcome_indicator', nade_var = 'prior_non_ade.factor', time.df) nade_dth.depo.df <- subset(time.df, month <= max_month_for_nade_dth_outcome) dd <- datadist(nade_dth.depo.df) options(datadist = 'dd') j <- nade_dth_depo[['msm2']] nade_dth.depo.summ <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,30),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No') nade_dth.depo.summ2 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,40),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No') nade_dth.depo.summ3 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,50),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_non_ade.factor = 'No') order.for.summ <- c(which(rownames(nade_dth.depo.summ) == 'age_at_t0'), which(rownames(nade_dth.depo.summ) == 'month'), which(rownames(nade_dth.depo.summ) == 'cum_depo'), which(rownames(nade_dth.depo.summ) == 'year_of_t0'), which(rownames(nade_dth.depo.summ) == 'baseline_cd4_count_b'), which(rownames(nade_dth.depo.summ) == 'log10_baseline_vl'), which(rownames(nade_dth.depo.summ) == 'prior_hcp_t0'), which(rownames(nade_dth.depo.summ) == 'prior_non_ade.factor - Yes:No'), which(rownames(nade_dth.depo.summ) == 'race_2level.factor - Non-white:White'), which(rownames(nade_dth.depo.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'), which(rownames(nade_dth.depo.summ) == 'depo_ind.factor - Yes:No'), which(rownames(nade_dth.depo.summ) == 'prior_hcv_t0.factor - Yes:No')) order.for.summ <- order.for.summ + 1 nade_dth_depo_or <- format(round(nade_dth.depo.summ[order.for.summ,'Effect'],2),nsmall=2) # nade_dth_depo_or <- format(round(nade_dth.depo.summ[which(rownames(nade_dth.depo.summ) == ' Odds Ratio'),'Effect'],2),nsmall=2) nade_dth_depo_or2 <- format(round(nade_dth.depo.summ2[2,'Effect'],2),nsmall=2) nade_dth_depo_or3 <- format(round(nade_dth.depo.summ3[2,'Effect'],2),nsmall=2) nade_dth_depo_ci <- paste('(',format(round(nade_dth.depo.summ[order.for.summ,'Lower 0.95'],2),nsmall=2), ', ',format(round(nade_dth.depo.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='') nade_dth_depo_ci2 <- paste('(', format(round(nade_dth.depo.summ2[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(nade_dth.depo.summ2[2,'Upper 0.95'],2),nsmall=2), ')',sep='') nade_dth_depo_ci3 <- paste('(', format(round(nade_dth.depo.summ3[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(nade_dth.depo.summ3[2,'Upper 0.95'],2),nsmall=2), ')',sep='') rownames.anova <- rownames(anova(nade_dth_depo[['msm2']])) order.for.anova <- c(which(rownames.anova == 'age_at_t0'), which(rownames.anova == 'month'), which(rownames.anova == 'cum_depo'), which(rownames.anova == 'year_of_t0'), which(rownames.anova == 'baseline_cd4_count_b'), which(rownames.anova == 'log10_baseline_vl'), which(rownames.anova == 'prior_hcp_t0'), which(rownames.anova == 'prior_non_ade.factor'), which(rownames.anova == 'race_2level.factor'), which(rownames.anova == 'route_of_infection_combined.factor'), which(rownames.anova == 'depo_ind.factor'), which(rownames.anova == 'prior_hcv_t0.factor')) nade_dth_depo_p <- c(anova(nade_dth_depo[['msm2']])[order.for.anova,'P']) nade_dth_depo_p <- ifelse(nade_dth_depo_p < 0.001, '< 0.001', ifelse(nade_dth_depo_p >= 0.001 & nade_dth_depo_p < 0.01, format(round(nade_dth_depo_p,3),nsmall=2), ifelse(nade_dth_depo_p >= 0.01, format(round(nade_dth_depo_p,2),nsmall=2),''))) nade_dth_depo_p <- ifelse(is.na(nade_dth_depo_p),'', nade_dth_depo_p) mh_nade_dth_depo <- by_outcomes_with_depo(max_month = 'max_month_for_mh_nade_dth_outcome', out = 'mh_nade_dth_outcome_indicator', nade_var = 'prior_mh_non_ade.factor', time.df) mh_nade_dth.depo.df <- subset(time.df, month <= max_month_for_mh_nade_dth_outcome) dd <- datadist(mh_nade_dth.depo.df) options(datadist = 'dd') j <- mh_nade_dth_depo[['msm2']] mh_nade_dth.depo.summ <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,30),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No') mh_nade_dth.depo.summ2 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,40),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No') mh_nade_dth.depo.summ3 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,50),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade.factor = 'No') order.for.summ <- c(which(rownames(mh_nade_dth.depo.summ) == 'age_at_t0'), which(rownames(mh_nade_dth.depo.summ) == 'month'), which(rownames(mh_nade_dth.depo.summ) == 'cum_depo'), which(rownames(mh_nade_dth.depo.summ) == 'year_of_t0'), which(rownames(mh_nade_dth.depo.summ) == 'baseline_cd4_count_b'), which(rownames(mh_nade_dth.depo.summ) == 'log10_baseline_vl'), which(rownames(mh_nade_dth.depo.summ) == 'prior_hcp_t0'), which(rownames(mh_nade_dth.depo.summ) == 'prior_mh_non_ade.factor - Yes:No'), which(rownames(mh_nade_dth.depo.summ) == 'race_2level.factor - Non-white:White'), which(rownames(mh_nade_dth.depo.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'), which(rownames(mh_nade_dth.depo.summ) == 'depo_ind.factor - Yes:No'), which(rownames(mh_nade_dth.depo.summ) == 'prior_hcv_t0.factor - Yes:No')) order.for.summ <- order.for.summ + 1 mh_nade_dth_depo_or <- format(round(mh_nade_dth.depo.summ[order.for.summ,'Effect'],2),nsmall=2) mh_nade_dth_depo_or2 <- format(round(mh_nade_dth.depo.summ2[2,'Effect'],2),nsmall=2) mh_nade_dth_depo_or3 <- format(round(mh_nade_dth.depo.summ3[2,'Effect'],2),nsmall=2) mh_nade_dth_depo_ci <- paste('(',format(round(mh_nade_dth.depo.summ[order.for.summ,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_dth.depo.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='') mh_nade_dth_depo_ci2 <- paste('(', format(round(mh_nade_dth.depo.summ2[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_dth.depo.summ2[2,'Upper 0.95'],2),nsmall=2), ')',sep='') mh_nade_dth_depo_ci3 <- paste('(', format(round(mh_nade_dth.depo.summ3[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_dth.depo.summ3[2,'Upper 0.95'],2),nsmall=2), ')',sep='') rownames.anova <- rownames(anova(mh_nade_dth_depo[['msm2']])) order.for.anova <- c(which(rownames.anova == 'age_at_t0'), which(rownames.anova == 'month'), which(rownames.anova == 'cum_depo'), which(rownames.anova == 'year_of_t0'), which(rownames.anova == 'baseline_cd4_count_b'), which(rownames.anova == 'log10_baseline_vl'), which(rownames.anova == 'prior_hcp_t0'), which(rownames.anova == 'prior_mh_non_ade.factor'), which(rownames.anova == 'race_2level.factor'), which(rownames.anova == 'route_of_infection_combined.factor'), which(rownames.anova == 'depo_ind.factor'), which(rownames.anova == 'prior_hcv_t0.factor')) mh_nade_dth_depo_p <- c(anova(mh_nade_dth_depo[['msm2']])[order.for.anova,'P']) mh_nade_dth_depo_p <- ifelse(mh_nade_dth_depo_p < 0.001, '< 0.001', ifelse(mh_nade_dth_depo_p >= 0.001 & mh_nade_dth_depo_p < 0.01, format(round(mh_nade_dth_depo_p,3),nsmall=2), ifelse(mh_nade_dth_depo_p >= 0.01, format(round(mh_nade_dth_depo_p,2),nsmall=2),''))) mh_nade_dth_depo_p <- ifelse(is.na(mh_nade_dth_depo_p),'', mh_nade_dth_depo_p) #------------------------------------------# mh_nade_only_dth_depo <- by_outcomes_with_depo(max_month = 'max_month_for_mh_nade_only_dth_outcome', out = 'mh_nade_only_dth_outcome_indicator', nade_var = 'prior_mh_non_ade_only.factor', time.df) mh_nade_only_dth.depo.df <- subset(time.df, month <= max_month_for_mh_nade_only_dth_outcome) dd <- datadist(mh_nade_only_dth.depo.df) options(datadist = 'dd') j <- mh_nade_only_dth_depo[['msm2']] mh_nade_only_dth.depo.summ <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,30),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No') mh_nade_only_dth.depo.summ2 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,40),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No') mh_nade_only_dth.depo.summ3 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,50),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_only.factor = 'No') order.for.summ <- c(which(rownames(mh_nade_only_dth.depo.summ) == 'age_at_t0'), which(rownames(mh_nade_only_dth.depo.summ) == 'month'), which(rownames(mh_nade_only_dth.depo.summ) == 'cum_depo'), which(rownames(mh_nade_only_dth.depo.summ) == 'year_of_t0'), which(rownames(mh_nade_only_dth.depo.summ) == 'baseline_cd4_count_b'), which(rownames(mh_nade_only_dth.depo.summ) == 'log10_baseline_vl'), which(rownames(mh_nade_only_dth.depo.summ) == 'prior_hcp_t0'), which(rownames(mh_nade_only_dth.depo.summ) == 'prior_mh_non_ade_only.factor - Yes:No'), which(rownames(mh_nade_only_dth.depo.summ) == 'race_2level.factor - Non-white:White'), which(rownames(mh_nade_only_dth.depo.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'), which(rownames(mh_nade_only_dth.depo.summ) == 'depo_ind.factor - Yes:No'), which(rownames(mh_nade_only_dth.depo.summ) == 'prior_hcv_t0.factor - Yes:No')) order.for.summ <- order.for.summ + 1 mh_nade_only_dth_depo_or <- format(round(mh_nade_only_dth.depo.summ[order.for.summ,'Effect'],2),nsmall=2) mh_nade_only_dth_depo_or2 <- format(round(mh_nade_only_dth.depo.summ2[2,'Effect'],2),nsmall=2) mh_nade_only_dth_depo_or3 <- format(round(mh_nade_only_dth.depo.summ3[2,'Effect'],2),nsmall=2) mh_nade_only_dth_depo_ci <- paste('(',format(round(mh_nade_only_dth.depo.summ[order.for.summ,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_only_dth.depo.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='') mh_nade_only_dth_depo_ci2 <- paste('(', format(round(mh_nade_only_dth.depo.summ2[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_only_dth.depo.summ2[2,'Upper 0.95'],2),nsmall=2),')',sep='') mh_nade_only_dth_depo_ci3 <- paste('(', format(round(mh_nade_only_dth.depo.summ3[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_only_dth.depo.summ3[2,'Upper 0.95'],2),nsmall=2),')',sep='') rownames.anova <- rownames(anova(mh_nade_only_dth_depo[['msm2']])) order.for.anova <- c(which(rownames.anova == 'age_at_t0'), which(rownames.anova == 'month'), which(rownames.anova == 'cum_depo'), which(rownames.anova == 'year_of_t0'), which(rownames.anova == 'baseline_cd4_count_b'), which(rownames.anova == 'log10_baseline_vl'), which(rownames.anova == 'prior_hcp_t0'), which(rownames.anova == 'prior_mh_non_ade_only.factor'), which(rownames.anova == 'race_2level.factor'), which(rownames.anova == 'route_of_infection_combined.factor'), which(rownames.anova == 'depo_ind.factor'), which(rownames.anova == 'prior_hcv_t0.factor')) mh_nade_only_dth_depo_p <- c(anova(mh_nade_only_dth_depo[['msm2']])[order.for.anova,'P']) mh_nade_only_dth_depo_p <- ifelse(mh_nade_only_dth_depo_p < 0.001, '< 0.001', ifelse(mh_nade_only_dth_depo_p >= 0.001 & mh_nade_only_dth_depo_p < 0.01,format(round(mh_nade_only_dth_depo_p,3),nsmall=2), ifelse(mh_nade_only_dth_depo_p >= 0.01, format(round(mh_nade_only_dth_depo_p,2),nsmall=2),''))) mh_nade_only_dth_depo_p <- ifelse(is.na(mh_nade_only_dth_depo_p),'', mh_nade_only_dth_depo_p) mh_nade_no_sa_dth_depo <- by_outcomes_with_depo(max_month = 'max_month_for_mh_nade_no_sa_dth_outcome', out = 'mh_nade_no_sa_dth_outcome_indicator', nade_var = 'prior_mh_non_ade_no_sa.factor', time.df) mh_nade_no_sa_dth.depo.df <- subset(time.df, month <= max_month_for_mh_nade_no_sa_dth_outcome) dd <- datadist(mh_nade_no_sa_dth.depo.df) options(datadist = 'dd') j <- mh_nade_no_sa_dth_depo[['msm2']] mh_nade_no_sa_dth.depo.summ <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,30),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No') mh_nade_no_sa_dth.depo.summ2 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,40),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No') mh_nade_no_sa_dth.depo.summ3 <- summary(j, race_2level.factor = 'White', route_of_infection_combined = 'Non-IDU', prior_hcv_t0.factor = 'No',depo_ind.factor = 'No', year_of_t0 = c(2000, 2003),month=c(10,16), age_at_t0 = c(35,50),cum_depo = c(0,1),baseline_cd4_count_b = c(500,600), log10_baseline_vl = c(3,4),prior_hcp_t0 = 0,prior_mh_non_ade_no_sa.factor = 'No') order.for.summ <- c(which(rownames(mh_nade_no_sa_dth.depo.summ) == 'age_at_t0'), which(rownames(mh_nade_no_sa_dth.depo.summ) == 'month'), which(rownames(mh_nade_no_sa_dth.depo.summ) == 'cum_depo'), which(rownames(mh_nade_no_sa_dth.depo.summ) == 'year_of_t0'), which(rownames(mh_nade_no_sa_dth.depo.summ) == 'baseline_cd4_count_b'), which(rownames(mh_nade_no_sa_dth.depo.summ) == 'log10_baseline_vl'), which(rownames(mh_nade_no_sa_dth.depo.summ) == 'prior_hcp_t0'), which(rownames(mh_nade_no_sa_dth.depo.summ) == 'prior_mh_non_ade_no_sa.factor - Yes:No'), which(rownames(mh_nade_no_sa_dth.depo.summ) == 'race_2level.factor - Non-white:White'), which(rownames(mh_nade_no_sa_dth.depo.summ) == 'route_of_infection_combined.factor - IDU:Non-IDU'), which(rownames(mh_nade_no_sa_dth.depo.summ) == 'depo_ind.factor - Yes:No'), which(rownames(mh_nade_no_sa_dth.depo.summ) == 'prior_hcv_t0.factor - Yes:No')) order.for.summ <- order.for.summ + 1 mh_nade_no_sa_dth_depo_or <- format(round(mh_nade_no_sa_dth.depo.summ[order.for.summ,'Effect'],2),nsmall=2) mh_nade_no_sa_dth_depo_or2 <- format(round(mh_nade_no_sa_dth.depo.summ2[2,'Effect'],2),nsmall=2) mh_nade_no_sa_dth_depo_or3 <- format(round(mh_nade_no_sa_dth.depo.summ3[2,'Effect'],2),nsmall=2) mh_nade_no_sa_dth_depo_ci <- paste('(',format(round(mh_nade_no_sa_dth.depo.summ[order.for.summ,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_no_sa_dth.depo.summ[order.for.summ,'Upper 0.95'],2),nsmall=2),')',sep='') mh_nade_no_sa_dth_depo_ci2 <- paste('(', format(round(mh_nade_no_sa_dth.depo.summ2[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_no_sa_dth.depo.summ2[2,'Upper 0.95'],2),nsmall=2),')',sep='') mh_nade_no_sa_dth_depo_ci3 <- paste('(', format(round(mh_nade_no_sa_dth.depo.summ3[2,'Lower 0.95'],2),nsmall=2), ', ',format(round(mh_nade_no_sa_dth.depo.summ3[2,'Upper 0.95'],2),nsmall=2),')',sep='') rownames.anova <- rownames(anova(mh_nade_no_sa_dth_depo[['msm2']])) order.for.anova <- c(which(rownames.anova == 'age_at_t0'), which(rownames.anova == 'month'), which(rownames.anova == 'cum_depo'), which(rownames.anova == 'year_of_t0'), which(rownames.anova == 'baseline_cd4_count_b'), which(rownames.anova == 'log10_baseline_vl'), which(rownames.anova == 'prior_hcp_t0'), which(rownames.anova == 'prior_mh_non_ade_no_sa.factor'), which(rownames.anova == 'race_2level.factor'), which(rownames.anova == 'route_of_infection_combined.factor'), which(rownames.anova == 'depo_ind.factor'), which(rownames.anova == 'prior_hcv_t0.factor')) mh_nade_no_sa_dth_depo_p <- c(anova(mh_nade_no_sa_dth_depo[['msm2']])[order.for.anova,'P']) mh_nade_no_sa_dth_depo_p <- ifelse(mh_nade_no_sa_dth_depo_p < 0.001, '< 0.001', ifelse(mh_nade_no_sa_dth_depo_p >= 0.001 & mh_nade_no_sa_dth_depo_p < 0.01,format(round(mh_nade_no_sa_dth_depo_p,3),nsmall=2), ifelse(mh_nade_no_sa_dth_depo_p >= 0.01, format(round(mh_nade_no_sa_dth_depo_p,2),nsmall=2),''))) mh_nade_no_sa_dth_depo_p <- ifelse(is.na(mh_nade_no_sa_dth_depo_p),'', mh_nade_no_sa_dth_depo_p) # Just need a simple model with death as the outcome df <- subset(time.df, month <= max_month_for_death_outcome) df <- time.df[which(time.df$month <= time.df[['max_month_for_death_outcome']]),] wt.mod.denom <- glm(depo_ind.factor ~ age_at_t0 + rcs(month,4) + race_2level.factor + rcs(cd4_count_b,3) + (1-undetectable_vl)*log10_hiv1_rna_b + haart_ind + reg_indicator + route_of_infection_combined.factor + prior_hcv_t0.factor + year_of_t0 + ade_indicator + prior_hcp_t0 + prior_non_ade.factor, data=df, subset=pregnancy_indicator == 0, family='binomial') denom <- exp(predict(wt.mod.denom))/(1+exp(predict(wt.mod.denom))) #plus time-invariant covariates from above wt.mod.num <- glm(hcp_ind.factor ~ rcs(month,4), data=df, subset=pregnancy_indicator == 0 & !is.na(cd4_count_b) & !is.na(log10_hiv1_rna_b),family='binomial') num <- exp(predict(wt.mod.num))/(1+exp(predict(wt.mod.num))) wt <- ifelse(df$hcp_ind[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] == 1, num/denom, (1-num)/(1-denom)) quant.wt <- quantile(wt, probs=c(0.025,0.975)) wt <- ifelse(wt < quant.wt[1],quant.wt[1], ifelse(wt > quant.wt[2],quant.wt[2],wt)) outcome <- df$death_outcome_indicator[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] depo_ind.factor <- df$depo_ind.factor[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] cum_depo <- df$cum_depo[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] pregnancy_indicator <- df$pregnancy_indicator[which(df$pregnancy_indicator == 0 & !is.na(df$cd4_count_b) & !is.na(df$log10_hiv1_rna_b))] msm.depo <- glm(outcome ~ depo_ind.factor + cum_depo, weights=wt, family='quasibinomial',subset=pregnancy_indicator == 0) outcome2 <- factor(outcome) dd <<- datadist(df) options(datadist='dd') msm2.depo <- lrm(outcome2 ~ depo_ind.factor + cum_depo,weights=wt, subset=pregnancy_indicator == 0,tol=-10) death.simple.depo.summ <- summary(msm2.depo, depo_ind.factor = 'No',cum_depo = c(0,1)) death_simple_depo_or <- format(round(death.simple.depo.summ[which(rownames(death.simple.depo.summ) == ' Odds Ratio'),'Effect'],2),nsmall=2) death_simple_depo_ci <- paste('(', format(round(death.simple.depo.summ[which(rownames(death.simple.depo.summ) == ' Odds Ratio'), 'Lower 0.95'],2),nsmall=2), ', ',format(round(death.simple.depo.summ[which(rownames(death.simple.depo.summ) == ' Odds Ratio'), 'Upper 0.95'],2),nsmall=2),')',sep='') rownames.anova <- rownames(anova(msm2.depo)) order.for.anova <- c(which(rownames.anova == 'cum_depo'), which(rownames.anova == 'depo_ind.factor')) death_simple_depo_p <- c(anova(msm2.depo)[order.for.anova[1:2],'P']) death_simple_depo_p <- ifelse(death_simple_depo_p < 0.001, '< 0.001', ifelse(death_simple_depo_p >= 0.001 & death_simple_depo_p < 0.01, round(death_simple_depo_p,3), ifelse(death_simple_depo_p >= 0.01, round(death_simple_depo_p,2),''))) death_simple_depo_p <- ifelse(is.na(death_simple_depo_p),'', death_simple_depo_p) # Frequency table of NADEs mh_nade_long$last_age <- time.df[match(mh_nade_long$cfar_pid,time.df$cfar_pid,nomatch=NA),'last_age'] sub_mh_nade_long <- subset(mh_nade_long, age_mh_nade >= age_at_t0 & age_mh_nade <= last_age) params <- grep('age_at_mh',names(mh_nade),value=TRUE) mh_nade_long2 <- na.omit(reshape(mh_nade,params,v.names='age',timevar='event',idvar='cfar_pid',direction='long',sep=',', drop=setdiff(names(mh_nade),c('cfar_pid',params)))) params <- grep('mh_non_ade_after',names(mh_nade),value=TRUE) mh_nade$mh_non_ade_after_t01 <- ifelse(mh_nade$mh_non_ade_after_t01 == '',NA,mh_nade$mh_non_ade_after_t01) mh_nade$mh_non_ade_after_t02 <- ifelse(mh_nade$mh_non_ade_after_t02 == '',NA,mh_nade$mh_non_ade_after_t02) mh_nade$mh_non_ade_after_t03 <- ifelse(mh_nade$mh_non_ade_after_t03 == '',NA,mh_nade$mh_non_ade_after_t03) mh_nade$mh_non_ade_after_t04 <- ifelse(mh_nade$mh_non_ade_after_t04 == '',NA,mh_nade$mh_non_ade_after_t04) mh_nade$mh_non_ade_after_t05 <- ifelse(mh_nade$mh_non_ade_after_t05 == '',NA,mh_nade$mh_non_ade_after_t05) mh_nade$mh_non_ade_after_t06 <- ifelse(mh_nade$mh_non_ade_after_t06 == '',NA,mh_nade$mh_non_ade_after_t06) nade_event <- na.omit(reshape(mh_nade,params,v.names='nade',timevar='event',idvar='cfar_pid',direction='long',sep=',', drop=setdiff(names(mh_nade),c('cfar_pid',params)))) mh_nade_long2$pid.event <- paste(mh_nade_long2$cfar_pid,mh_nade_long2$event,sep='.') nade_event$pid.event <- paste(nade_event$cfar_pid,nade_event$event,sep='.') # Two NADEs listed without ages so must remove them nade_event <- nade_event[which(nade_event$pid.event %in% intersect(mh_nade_long2$pid.event,nade_event$pid.event)),] mh_nade_long2$nade <- nade_event[match(mh_nade_long2$pid.event,nade_event$pid.event,nomatch=NA),'nade'] mh_nade$mh_non_ade_category1 <- ifelse(mh_nade$mh_non_ade_category1 == '',NA,mh_nade$mh_non_ade_category1) mh_nade$mh_non_ade_category2 <- ifelse(mh_nade$mh_non_ade_category2 == '',NA,mh_nade$mh_non_ade_category2) mh_nade$mh_non_ade_category3 <- ifelse(mh_nade$mh_non_ade_category3 == '',NA,mh_nade$mh_non_ade_category3) mh_nade$mh_non_ade_category4 <- ifelse(mh_nade$mh_non_ade_category4 == '',NA,mh_nade$mh_non_ade_category4) mh_nade$mh_non_ade_category5 <- ifelse(mh_nade$mh_non_ade_category5 == '',NA,mh_nade$mh_non_ade_category5) mh_nade$mh_non_ade_category6 <- ifelse(mh_nade$mh_non_ade_category6 == '',NA,mh_nade$mh_non_ade_category6) params <- grep('mh_non_ade_category',names(mh_nade),value=TRUE) nade_category <- na.omit(reshape(mh_nade,params,v.names='nade_category',timevar='event',idvar='cfar_pid',direction='long',sep=',', drop=setdiff(names(mh_nade),c('cfar_pid',params)))) nade_category$pid.event <- paste(nade_category$cfar_pid,nade_category$event,sep='.') nade_category <- nade_category[which(nade_category$pid.event %in% intersect(mh_nade_long2$pid.event,nade_category$pid.event)),] mh_nade_long2$nade_category <- nade_category[match(mh_nade_long2$pid.event,nade_category$pid.event,nomatch=NA),'nade_category'] mh_nade_long2 <- subset(mh_nade_long2, nade_category != 'SUBSTANCE ABUSE') mh_nade_long2$age_at_t0 <- mh_nade[match(mh_nade_long2$cfar_pid,mh_nade$cfar_pid,nomatch=NA),'age_at_t0'] mh_nade_long2$age_after_year_gap <- mh_nade[match(mh_nade_long2$cfar_pid,mh_nade$cfar_pid,nomatch=NA),'age_after_year_gap'] mh_nade_long2$last_age <- time.df[match(mh_nade_long2$cfar_pid, time.df$cfar_pid, nomatch=NA),'last_age'] mh_nade_long2 <- subset(mh_nade_long2, age >= age_at_t0 & age <= last_age & cfar_pid %in% unique(time.df$cfar_pid)) mh_nade_long2$max_number_months_followed <- time.df[match(mh_nade_long2$cfar_pid, time.df$cfar_pid, nomatch=NA),'max_number_months_followed'] mh_nade_long2$max_number_years_followed <- mh_nade_long2$max_number_months_followed/12 mh_nade_long2$month <- with(mh_nade_long2, ceiling((age - age_at_t0)*12)) mh_nade_long2$pid.month <- paste(mh_nade_long2$cfar_pid, mh_nade_long2$month, sep='.') time.df$nade_name <- mh_nade_long2[match(time.df$pid.month,mh_nade_long2$pid.month,nomatch=NA),'nade'] time.df$nade_category <- mh_nade_long2[match(time.df$pid.month,mh_nade_long2$pid.month,nomatch=NA),'nade_category'] dup.ids.month <- mh_nade_long2[duplicated(mh_nade_long2$pid.month),'pid.month'] mh_nade_long2$duplicated_pid.month <- ifelse(mh_nade_long2$pid.month %in% dup.ids.month,1,0) dup.records <- mh_nade_long2[duplicated(mh_nade_long2$pid.month),] time.df$month_of_first_nade <- with(time.df, ceiling((age_of_first_nade - age_at_t0)*12)) time.df$month_of_first_mh_nade_only <- with(time.df, ceiling((age_of_first_mh_nade_only - age_at_t0)*12)) time.df$month_of_first_mh_nade_no_sa <- with(time.df, ceiling((age_of_first_mh_nade_no_sa - age_at_t0)*12)) time.df$duplicated_pid.month <- mh_nade_long2[match(time.df$pid.month,mh_nade_long2$pid.month,nomatch=NA),'duplicated_pid.month'] time.df$nade_category2 <- dup.records[match(time.df$pid.month,dup.records$pid.month,nomatch=NA),'nade_category'] time.df$nade_name2 <- dup.records[match(time.df$pid.month,dup.records$pid.month,nomatch=NA),'nade'] time.df$nade_category_new <- ifelse(time.df$nade_category %in% c('CARDIOVASCULAR','CEREBROVASCULAR'),'CARDIOVASCULAR OR CEREBROVASCULAR', time.df$nade_category) time.df$nade_category2_new <- ifelse(time.df$nade_category2 %in% c('CARDIOVASCULAR','CEREBROVASCULAR'),'CARDIOVASCULAR OR CEREBROVASCULAR', time.df$nade_category2) incidence_function <- function(df,nade_of_interest){ # Event data evt_df <- subset(df, nade_category_new %in% nade_of_interest | nade_category2_new %in% nade_of_interest) evt_df <- evt_df[order(evt_df$cfar_pid,evt_df$age),] evt_df$nade_category_new <- ifelse(evt_df$nade_category_new %nin% nade_of_interest,NA,evt_df$nade_category_new) evt_df$nade_category2_new <- ifelse(evt_df$nade_category2_new %nin% nade_of_interest,NA,evt_df$nade_category2_new) # Counting ALL events, not just the first one tot_evts_hc <- length(which(evt_df$hcp_ind == 1)) tot_evts_no_hc <- length(which(evt_df$hcp_ind == 0)) evt_df <- evt_df[!duplicated(evt_df$cfar_pid),] evt_df$cfar_pid <- factor(evt_df$cfar_pid) # Data with no event of interest no_evt_df <- subset(df, cfar_pid %nin% evt_df$cfar_pid) no_evt_df$cfar_pid <- factor(no_evt_df$cfar_pid) # ----- Find the max cumulative time not on HCP ---- # # In the event data evt_df$max_cum_no_hcp <- NA split(evt_df$max_cum_no_hcp, evt_df$cfar_pid) <- lapply(split(evt_df$cum_no_hcp_modified,evt_df$cfar_pid),FUN=function(y){ return(max(y)) }) # In the non-event data no_evt_df$max_cum_no_hcp <- NA split(no_evt_df$max_cum_no_hcp,no_evt_df$cfar_pid) <- lapply(split(no_evt_df$cum_no_hcp_modified,no_evt_df$cfar_pid),FUN=function(y){ return(max(y)) }) # ----- Find the max cumulative time on HCP ---- # # In the non-event data no_evt_df$max_cum_hcp <- NA split(no_evt_df$max_cum_hcp, no_evt_df$cfar_pid) <- lapply(split(no_evt_df$new_cum_hcp,no_evt_df$cfar_pid),FUN=function(y){ return(max(y)) }) # Calculating incidences of nade of interest if(nade_of_interest[1] == 'DEATH'){ num_evt_hc <- length(which(evt_df$hcp_ind == 1)) } else { num_evt_hc <- length(which(!is.na(evt_df$nade_category_new) & evt_df$hcp_ind == 1)) + length(which(!is.na(evt_df$nade_category2_new[which(evt_df$nade_category2_new != 'DEATH')]) & evt_df$hcp_ind[which(evt_df$nade_category2_new != 'DEATH')] == 1)) } fu_on_hc <- sum(evt_df$new_cum_hcp) + sum(no_evt_df$max_cum_hcp[!duplicated(no_evt_df$cfar_pid)]) # sub_no_evt_df <- subset(no_evt_df, hcp_ind == 0) if(nade_of_interest[1] == 'DEATH'){ num_evt_no_hc <- length(which(evt_df$hcp_ind == 0)) } else { num_evt_no_hc <- length(which(!is.na(evt_df$nade_category_new) & evt_df$hcp_ind == 0)) + length(which(!is.na(evt_df$nade_category2_new[which(evt_df$nade_category2_new != 'DEATH')]) & evt_df$hcp_ind[which(evt_df$nade_category2_new != 'DEATH')] == 0)) } fu_no_hc <- sum(evt_df$max_cum_no_hcp) + sum(no_evt_df$max_cum_no_hcp[!duplicated(no_evt_df$cfar_pid)]) # For those on HC (number of nade_of_interest while on HC/(sum of time on HC for those with event and those without event) incidence_hc <- format(round(num_evt_hc/fu_on_hc*1000,2),nsmall=2) incidence_no_hc <- format(round(num_evt_no_hc/fu_no_hc*1000,2), nsmall=2) fu_on_hc <- format(round(fu_on_hc,2),nsmall=2) fu_no_hc <- format(round(fu_no_hc,2),nsmall=2) return(list(incidence_hc=incidence_hc, num_evt_hc = num_evt_hc, tot_evts_hc = tot_evts_hc, fu_on_hc = fu_on_hc, incidence_no_hc=incidence_no_hc, num_evt_no_hc = num_evt_no_hc, tot_evts_no_hc = tot_evts_no_hc, fu_no_hc = fu_no_hc)) } cardio_incidence <- incidence_function(time.df,nade_of_interest='CARDIOVASCULAR OR CEREBROVASCULAR') hepatic_incidence <- incidence_function(time.df,nade_of_interest='HEPATIC') mh_incidence <- incidence_function(time.df, nade_of_interest='MENTAL HEALTH') metabolic_incidence <- incidence_function(time.df, nade_of_interest='METABOLIC') renal_incidence <- incidence_function(time.df, nade_of_interest='RENAL') any_nade_incidence <- incidence_function(time.df, nade_of_interest=c('CARDIOVASCULAR OR CEREBROVASCULAR','HEPATIC','METABOLIC','MALIGNANCY','RENAL')) any_mh_nade_no_sa_incidence <- incidence_function(time.df, nade_of_interest=c('CARDIOVASCULAR OR CEREBROVASCULAR','HEPATIC','MENTAL HEALTH','METABOLIC','MALIGNANCY','RENAL')) # Calculating incidence rates for death. Will fill in nade_category_new with DEATH; for the subject who had a value at nade_category_new, will put it in nade_category2_new which.rows <- which(time.df$month == time.df$month_at_death) time.df$nade_category[which.rows] <- ifelse(is.na(time.df$nade_category[which.rows]),'DEATH',time.df$nade_category[which.rows]) time.df$nade_category_new[which.rows] <- ifelse(is.na(time.df$nade_category_new[which.rows]),'DEATH',time.df$nade_category_new[which.rows]) time.df$nade_category2_new[which.rows] <- ifelse(is.na(time.df$nade_category2_new[which.rows]),'DEATH',time.df$nade_category2_new[which.rows]) time.df$nade_category2_new <- ifelse(!is.na(time.df$nade_category_new) & time.df$nade_category_new == 'DEATH',NA, time.df$nade_category2_new) death_incidence <- incidence_function(time.df, nade_of_interest='DEATH') # Alternate way of computing IRR incidence_function2 <- function(df,nade_of_interest){ evt_df <- subset(df, nade_category_new %in% nade_of_interest | nade_category2_new %in% nade_of_interest) evt_df <- evt_df[order(evt_df$cfar_pid,evt_df$age),] evt_df$nade_category_new <- ifelse(evt_df$nade_category_new %nin% nade_of_interest,NA,evt_df$nade_category_new) evt_df$nade_category2_new <- ifelse(evt_df$nade_category2_new %nin% nade_of_interest,NA,evt_df$nade_category2_new) which.duplicated <- which(duplicated(evt_df$cfar_pid)) num_evt_hc <- length(which(!is.na(evt_df$nade_category_new) & evt_df$hcp_ind == 1)) + length(which(!is.na(evt_df$nade_category2_new) & evt_df$hcp_ind == 1)) fu_on_hc <- sum(df$max_cum_hcp[!duplicated(df$cfar_pid)]) incidence_hc <- format(round(num_evt_hc/fu_on_hc*1000,2),nsmall=2) num_evt_no_hc <- length(which(!is.na(evt_df$nade_category_new) & evt_df$hcp_ind == 0)) + length(which(!is.na(evt_df$nade_category2_new) & evt_df$hcp_ind == 0)) fu_no_hc <- sum(df$max_cum_no_hcp[!duplicated(df$cfar_pid)]) incidence_no_hc <- format(round(num_evt_no_hc/fu_no_hc*1000,2),nsmall=2) fu_on_hc <- format(round(fu_on_hc,2),nsmall=2) fu_no_hc <- format(round(fu_no_hc,2),nsmall=2) return(list(num_evt_hc = num_evt_hc, incidence_hc = incidence_hc, fu_on_hc = fu_on_hc, num_evt_no_hc = num_evt_no_hc, incidence_no_hc = incidence_no_hc, fu_no_hc = fu_no_hc)) } time.df$max_cum_hcp <- NA split(time.df$max_cum_hcp, time.df$cfar_pid) <- lapply(split(time.df$new_cum_hcp,time.df$cfar_pid),FUN=function(y){ max(y) }) time.df$max_cum_no_hcp <- NA split(time.df$max_cum_no_hcp, time.df$cfar_pid) <- lapply(split(time.df$cum_no_hcp, time.df$cfar_pid), FUN=function(y){ max(y) }) cardio_incidence2 <- incidence_function2(time.df,nade_of_interest='CARDIOVASCULAR OR CEREBROVASCULAR') hepatic_incidence2 <- incidence_function2(time.df,nade_of_interest='HEPATIC') mh_incidence2 <- incidence_function2(time.df, nade_of_interest='MENTAL HEALTH') metabolic_incidence2 <- incidence_function2(time.df, nade_of_interest='METABOLIC') renal_incidence2 <- incidence_function2(time.df, nade_of_interest='RENAL') any_nade_incidence2 <- incidence_function2(time.df, nade_of_interest=c('CARDIOVASCULAR OR CEREBROVASCULAR','HEPATIC','METABOLIC','MALIGNANCY','RENAL')) any_mh_nade_no_sa_incidence2 <- incidence_function2(time.df, nade_of_interest=c('CARDIOVASCULAR OR CEREBROVASCULAR','HEPATIC','MENTAL HEALTH','METABOLIC','MALIGNANCY','RENAL')) death_incidence2 <- incidence_function2(time.df, nade_of_interest='DEATH') incidence_rate_df <- data.frame('NADE Category'=c('CARDIOVASCULAR OR CEREBROVASCULAR', 'HEPATIC', 'MENTAL HEALTH', 'METABOLIC', 'RENAL', 'Any NADE', 'Any NADE (including mental health)', 'Death'), 'on_hc'=c(paste(cardio_incidence[['incidence_hc']],' (',cardio_incidence[['num_evt_hc']],')',sep=''), paste(hepatic_incidence[['incidence_hc']],' (',hepatic_incidence[['num_evt_hc']],')',sep=''), paste(mh_incidence[['incidence_hc']],' (',mh_incidence[['num_evt_hc']],')',sep=''), paste(metabolic_incidence[['incidence_hc']],' (',metabolic_incidence[['num_evt_hc']],')',sep=''), paste(renal_incidence[['incidence_hc']],' (',renal_incidence[['num_evt_hc']],')',sep=''), paste(any_nade_incidence[['incidence_hc']],' (',any_nade_incidence[['num_evt_hc']],')',sep=''), paste(any_mh_nade_no_sa_incidence[['incidence_hc']],' (',any_mh_nade_no_sa_incidence[['num_evt_hc']],')',sep=''), paste(death_incidence[['incidence_hc']],' (',death_incidence[['num_evt_hc']],')',sep='')), 'not_on_hc'=c(paste(cardio_incidence[['incidence_no_hc']],' (',cardio_incidence[['num_evt_no_hc']],')',sep=''), paste(hepatic_incidence[['incidence_no_hc']],' (',hepatic_incidence[['num_evt_no_hc']],')',sep=''), paste(mh_incidence[['incidence_no_hc']],' (',mh_incidence[['num_evt_no_hc']],')',sep=''), paste(metabolic_incidence[['incidence_no_hc']],' (',metabolic_incidence[['num_evt_no_hc']],')',sep=''), paste(renal_incidence[['incidence_no_hc']],' (',renal_incidence[['num_evt_no_hc']],')',sep=''), paste(any_nade_incidence[['incidence_no_hc']],' (',any_nade_incidence[['num_evt_no_hc']],')',sep=''), paste(any_mh_nade_no_sa_incidence[['incidence_no_hc']],' (',any_mh_nade_no_sa_incidence[['num_evt_no_hc']],')',sep=''), paste(death_incidence[['incidence_no_hc']],' (',death_incidence[['num_evt_no_hc']],')',sep='')), 'on_hc2'=c(paste(cardio_incidence2[['incidence_hc']],' (',cardio_incidence2[['num_evt_hc']],')',sep=''), paste(hepatic_incidence2[['incidence_hc']],' (',hepatic_incidence2[['num_evt_hc']],')',sep=''), paste(mh_incidence2[['incidence_hc']],' (',mh_incidence2[['num_evt_hc']],')',sep=''), paste(metabolic_incidence2[['incidence_hc']],' (',metabolic_incidence2[['num_evt_hc']],')',sep=''), paste(renal_incidence2[['incidence_hc']],' (',renal_incidence2[['num_evt_hc']],')',sep=''), paste(any_nade_incidence2[['incidence_hc']],' (',any_nade_incidence2[['num_evt_hc']],')',sep=''), paste(any_mh_nade_no_sa_incidence2[['incidence_hc']],' (',any_mh_nade_no_sa_incidence2[['num_evt_hc']],')',sep=''), paste(death_incidence2[['incidence_hc']],' (',death_incidence2[['num_evt_hc']],')',sep='')), 'not_on_hc2'=c(paste(cardio_incidence2[['incidence_no_hc']],' (',cardio_incidence2[['num_evt_no_hc']],')',sep=''), paste(hepatic_incidence2[['incidence_no_hc']],' (',hepatic_incidence2[['num_evt_no_hc']],')',sep=''), paste(mh_incidence2[['incidence_no_hc']],' (',mh_incidence2[['num_evt_no_hc']],')',sep=''), paste(metabolic_incidence2[['incidence_no_hc']],' (',metabolic_incidence2[['num_evt_no_hc']],')',sep=''), paste(renal_incidence2[['incidence_no_hc']],' (',renal_incidence2[['num_evt_no_hc']],')',sep=''), paste(any_nade_incidence2[['incidence_no_hc']],' (',any_nade_incidence2[['num_evt_no_hc']],')',sep=''), paste(any_mh_nade_no_sa_incidence2[['incidence_no_hc']],' (',any_mh_nade_no_sa_incidence2[['num_evt_no_hc']],')',sep=''), paste(death_incidence2[['incidence_no_hc']],' (',death_incidence2[['num_evt_no_hc']],')',sep=''))) incidence_rate_df <- as.matrix(incidence_rate_df) colnames(incidence_rate_df) <- c('NADE Category','On HC','Not on HC','On HC','Not on HC') incidence_rate_df2 <- data.frame('NADE Category'=c('CARDIOVASCULAR OR CEREBROVASCULAR', 'HEPATIC', 'MENTAL HEALTH', 'METABOLIC', 'RENAL', 'Any NADE', 'Any NADE (including mental health)', 'Death'), 'on_hc2'=c(paste(cardio_incidence2[['incidence_hc']],' (',cardio_incidence2[['num_evt_hc']],')',sep=''), paste(hepatic_incidence2[['incidence_hc']],' (',hepatic_incidence2[['num_evt_hc']],')',sep=''), paste(mh_incidence2[['incidence_hc']],' (',mh_incidence2[['num_evt_hc']],')',sep=''), paste(metabolic_incidence2[['incidence_hc']],' (',metabolic_incidence2[['num_evt_hc']],')',sep=''), paste(renal_incidence2[['incidence_hc']],' (',renal_incidence2[['num_evt_hc']],')',sep=''), paste(any_nade_incidence2[['incidence_hc']],' (',any_nade_incidence2[['num_evt_hc']],')',sep=''), paste(any_mh_nade_no_sa_incidence2[['incidence_hc']],' (',any_mh_nade_no_sa_incidence2[['num_evt_hc']],')',sep=''), paste(death_incidence2[['incidence_hc']],' (',death_incidence2[['num_evt_hc']],')',sep='')), 'not_on_hc2'=c(paste(cardio_incidence2[['incidence_no_hc']],' (',cardio_incidence2[['num_evt_no_hc']],')',sep=''), paste(hepatic_incidence2[['incidence_no_hc']],' (',hepatic_incidence2[['num_evt_no_hc']],')',sep=''), paste(mh_incidence2[['incidence_no_hc']],' (',mh_incidence2[['num_evt_no_hc']],')',sep=''), paste(metabolic_incidence2[['incidence_no_hc']],' (',metabolic_incidence2[['num_evt_no_hc']],')',sep=''), paste(renal_incidence2[['incidence_no_hc']],' (',renal_incidence2[['num_evt_no_hc']],')',sep=''), paste(any_nade_incidence2[['incidence_no_hc']],' (',any_nade_incidence2[['num_evt_no_hc']],')',sep=''), paste(any_mh_nade_no_sa_incidence2[['incidence_no_hc']],' (',any_mh_nade_no_sa_incidence2[['num_evt_no_hc']],')',sep=''), paste(death_incidence2[['incidence_no_hc']],' (',death_incidence2[['num_evt_no_hc']],')',sep=''))) incidence_rate_df2 <- as.matrix(incidence_rate_df2) colnames(incidence_rate_df2) <- c('NADE Category','On HC','Not on HC') # Calculating incidence from actual ages and not artifical ones from monthly data frame mh_nade_long$age_after_year_gap <- sub_ages[match(mh_nade_long$cfar_pid,sub_ages$cfar_pid,nomatch=NA),'age_after_year_gap'] mh_nade_long$last_age <- time.df[match(mh_nade_long$cfar_pid,time.df$cfar_pid,nomatch=NA),'last_age'] jk <- subset(mh_nade_long, mh_nade_category != 'SUBSTANCE ABUSE' & cfar_pid %in% time.df$cfar_pid & age_mh_nade <= last_age & (is.na(age_after_year_gap) | age_mh_nade < age_after_year_gap)) jk$pid.age <- paste(jk$cfar_pid,jk$age_mh_nade,sep='.') dup.ids <- unique(jk$pid.age[which(duplicated(jk$pid.age))]) jk$dup.ids <- 0 jk$dup.ids[which(jk$pid.age %in% dup.ids)] <- 1 tmp <- subset(jk, dup.ids == 1) tmp <- tmp[order(tmp$cfar_pid, tmp$age_mh_nade),] tmp$nade <- NA split(tmp$nade,tmp$cfar_pid) <- lapply(split(tmp$cfar_pid,tmp$cfar_pid),FUN=function(y){seq(1,length(y))}) tmp.wide <- reshape(subset(tmp,select=c(cfar_pid,age_mh_nade,mh_nade_category,age_at_t0,last_age,pid.age,nade)), v.names='mh_nade_category',idvar='pid.age',timevar='nade',direction='wide') names(tmp.wide)[6:7] <- c('nade_category_new','nade_category2_new') tmp.wide$nade_category_new <- ifelse(tmp.wide$nade_category_new %in% c('CARDIOVASCULAR','CEREBROVASCULAR'),'CARDIOVASCULAR OR CEREBROVASCULAR',tmp.wide$nade_category_new) tmp.wide$nade_category2_new <- ifelse(tmp.wide$nade_category2_new %in% c('CARDIOVASCULAR','CEREBROVASCULAR'),'CARDIOVASCULAR OR CEREBROVASCULAR',tmp.wide$nade_category2_new) jk <- subset(jk, dup.ids == 0) jk$nade_category_new <- ifelse(!is.na(jk$mh_nade_category) & jk$mh_nade_category %in% c('CARDIOVASCULAR','CEREBROVASCULAR'),'CARDIOVASCULAR OR CEREBROVASCULAR',jk$mh_nade_category) jk.tmp <- merge(jk,tmp.wide,all=TRUE) hcp_total$age_after_year_gap <- time_diff.df[match(hcp_total$cfar_pid,time_diff.df$cfar_pid,nomatch=NA),'age_after_year_gap'] hcp_total$last_age <- time.df[match(hcp_total$cfar_pid,time.df$cfar_pid,nomatch=NA),'last_age'] hcp_total$age_at_death <- time.df[match(hcp_total$cfar_pid,time.df$cfar_pid,nomatch=NA),'age_at_death'] hcp_total$last_age <- ifelse(!is.na(hcp_total$age_at_death) & (!is.na(hcp_total$age_after_year_gap) & hcp_total$age_at_death < hcp_total$age_after_year_gap | is.na(hcp_total$age_after_year_gap)), hcp_total$age_at_death,hcp_total$last_age) hcp_total$hcp_diff <- apply(subset(hcp_total, select=c(age_at_hcp_start_date_modified, age_at_hcp_end_date,age_after_year_gap,last_age)),1,FUN=function(y){ if(y['age_at_hcp_start_date_modified'] > y['last_age']){ y['age_at_hcp_start_date_modified'] <- y['age_at_hcp_end_date'] <- NA } if(!is.na(y['age_at_hcp_end_date']) & y['age_at_hcp_end_date'] > y['last_age']){ y['age_at_hcp_end_date'] <- y['last_age'] } if(!is.na(y['age_after_year_gap']) & y['age_at_hcp_start_date_modified'] >= y['age_after_year_gap']){ y['age_at_hcp_start_date_modified'] <- y['age_at_hcp_end_date'] <- NA } if(!is.na(y['age_after_year_gap']) & !is.na(y['age_at_hcp_end_date']) & y['age_at_hcp_end_date'] >= y['age_after_year_gap']){ y['age_at_hcp_end_date'] <- y['last_age'] } hcp_diff <- y['age_at_hcp_end_date'] - y['age_at_hcp_start_date_modified'] if(is.na(hcp_diff)){ hcp_diff <- 0 } return(hcp_diff) }) hcp_total$cum_hcp <- NA split(hcp_total$cum_hcp, hcp_total$cfar_pid) <- lapply(split(hcp_total$hcp_diff,hcp_total$cfar_pid),FUN=function(y){ cumsum(y) }) hcp_total$max_cum_hcp <- NA split(hcp_total$max_cum_hcp,hcp_total$cfar_pid) <- lapply(split(hcp_total$cum_hcp,hcp_total$cfar_pid),FUN=function(y){ max(y)}) jk.tmp2 <- merge(jk.tmp,subset(hcp_total,select=c(cfar_pid,age_at_hcp_start_date_modified,age_at_hcp_end_date,max_cum_hcp,hcp_title_category)),all=TRUE) jk.tmp2$nade_on_hcp <- ifelse(jk.tmp2$age_mh_nade >= jk.tmp2$age_at_hcp_start_date_modified & jk.tmp2$age_mh_nade <= jk.tmp2$age_at_hcp_end_date,1,0) hcp_jk.tmp2 <- subset(jk.tmp2, cfar_pid %in% hcp_total$cfar_pid,select=c(cfar_pid,nade_category_new,nade_category2_new,max_cum_hcp)) time.df$age_after_year_gap <- sub_ages[match(time.df$cfar_pid,sub_ages$cfar_pid,nomatch=NA),'age_after_year_gap'] nhcp_df <- subset(time.df, cfar_pid %in% unique(time.df$cfar_pid)) nhcp_df$last_age <- ifelse(!is.na(nhcp_df$age_at_death),nhcp_df$age_at_death,nhcp_df$last_age) nhcp_df$max_cum_hcp <- hcp_total[match(nhcp_df$cfar_pid, hcp_total$cfar_pid,nomatch=NA),'max_cum_hcp'] nhcp_df$max_cum_hcp <- ifelse(is.na(nhcp_df$max_cum_hcp),0,nhcp_df$max_cum_hcp) nhcp_df$max_cum_no_hcp <- with(nhcp_df, last_age - age_at_t0 - max_cum_hcp) nhcp_df$tdf_max_cum_hcp <- time.df[match(nhcp_df$cfar_pid,time.df$cfar_pid,nomatch=NA),'max_cum_hcp'] nhcp_df$tdf_max_cum_no_hcp <- time.df[match(nhcp_df$cfar_pid,time.df$cfar_pid,nomatch=NA),'max_cum_no_hcp'] nhcp_df$tdf_cum_and_oth <- with(nhcp_df, tdf_max_cum_hcp - max_cum_hcp) nhcp_df$tdf_cum_no_and_oth <- with(nhcp_df, tdf_max_cum_no_hcp - max_cum_no_hcp) # Now adding in max_cum_no_hcp to jk.tmp2 for those IDs in both jk.tmp2$max_cum_no_hcp <- nhcp_df[match(jk.tmp2$cfar_pid,nhcp_df$cfar_pid,nomatch=NA),'max_cum_no_hcp'] jk.tmp3 <- merge(jk.tmp2, subset(nhcp_df, cfar_pid %nin% jk.tmp2$cfar_pid & !duplicated(cfar_pid), select=c(cfar_pid,age_at_t0,last_age,prior_mh_non_ade,prior_mh_non_ade_no_sa,prior_mh_non_ade_only,max_cum_hcp,max_cum_no_hcp)),all=TRUE) jk.tmp3$nade_on_hcp <- ifelse(is.na(jk.tmp3$nade_on_hcp),0,jk.tmp3$nade_on_hcp) jk.tmp3$max_cum_hcp <- ifelse(is.na(jk.tmp3$max_cum_hcp),0,jk.tmp3$max_cum_hcp) jk.tmp3$max_cum_no_hcp <- ifelse(is.na(jk.tmp3$max_cum_no_hcp),0,jk.tmp3$max_cum_no_hcp) jk.tmp3$age_at_death <- time.df[match(jk.tmp3$cfar_pid,time.df$cfar_pid,nomatch=NA),'age_at_death'] jk.tmp3$death_on_hcp <- ifelse(!is.na(jk.tmp3$age_at_death) & !is.na(jk.tmp3$age_at_hcp_start_date_modified) & jk.tmp3$age_at_death >= jk.tmp3$age_at_hcp_start_date_modified & jk.tmp3$age_at_death <= jk.tmp3$age_at_hcp_end_date,1,0) # A few with missing last age so will match it in again jk.tmp3$last_age <- time.df[match(jk.tmp3$cfar_pid, time.df$cfar_pid,nomatch=NA),'last_age'] incidence_function3 <- function(df,nade_of_interest){ if('DEATH' %nin% nade_of_interest){ df$nade_category_new <- ifelse(!is.na(df$nade_category_new) & df$nade_category_new %nin% nade_of_interest, NA, df$nade_category_new) df$nade_category2_new <- ifelse(!is.na(df$nade_category2_new) & df$nade_category2_new %nin% nade_of_interest, NA, df$nade_category2_new) evt_hc_df <- subset(df, !duplicated(pid.age) & !is.na(pid.age) & (nade_category_new %in% nade_of_interest | nade_category2_new %in% nade_of_interest) & nade_on_hcp == 1) num_evt_hc <- length(which(!is.na(evt_hc_df$nade_category_new) & evt_hc_df$nade_category_new %in% nade_of_interest)) + length(which(!is.na(evt_hc_df$nade_category2_new) & evt_hc_df$nade_category_new %in% nade_of_interest)) fu_on_hc <- sum(df$max_cum_hcp[!duplicated(df$cfar_pid)]) incidence_hc <- format(round(num_evt_hc/fu_on_hc*1000,2),nsmall=2) evt_no_hc_df <- subset(df, !duplicated(pid.age) & !is.na(pid.age) & (nade_category_new %in% nade_of_interest | nade_category2_new %in% nade_of_interest) & nade_on_hcp == 0) num_evt_no_hc <- length(which(!is.na(evt_no_hc_df$nade_category_new) & evt_no_hc_df$nade_on_hcp == 0)) + length(which(!is.na(evt_no_hc_df$nade_category2_new) & evt_no_hc_df$nade_on_hcp == 0)) fu_not_on_hc <- sum(df$max_cum_no_hcp[!duplicated(df$cfar_pid)]) incidence_no_hc <- format(round(num_evt_no_hc/fu_not_on_hc*1000,2),nsmall=2) } else { num_evt_hc <- length(which(!is.na(df$age_at_death) & df$death_on_hcp == 1)) fu_on_hc <- sum(df$max_cum_hcp[!duplicated(df$cfar_pid)]) incidence_hc <- format(round(num_evt_hc/fu_on_hc*1000,2),nsmall=2) num_evt_no_hc <- length(which(!is.na(df$age_at_death[!duplicated(df$cfar_pid)]))) - num_evt_hc fu_not_on_hc <- sum(df$max_cum_no_hcp[!duplicated(df$cfar_pid)]) incidence_no_hc <- format(round(num_evt_no_hc/fu_not_on_hc*1000,2),nsmall=2) } fu_on_hc <- round(fu_on_hc,2) fu_not_on_hc <- round(fu_not_on_hc,2) return(list(num_evt_hc = num_evt_hc, incidence_hc = incidence_hc, fu_on_hc = fu_on_hc, num_evt_no_hc = num_evt_no_hc, incidence_no_hc = incidence_no_hc, fu_not_on_hc = fu_not_on_hc)) } cardio_incidence3 <- incidence_function3(df=jk.tmp3,nade_of_interest='CARDIOVASCULAR OR CEREBROVASCULAR') hepatic_incidence3 <- incidence_function3(df=jk.tmp3,nade_of_interest='HEPATIC') mh_incidence3 <- incidence_function3(df=jk.tmp3, nade_of_interest='MENTAL HEALTH') metabolic_incidence3 <- incidence_function3(df=jk.tmp3, nade_of_interest='METABOLIC') # malignancy_incidence3 <- incidence_function3(df=jk.tmp3, nade_of_interest='MALIGNANCY') renal_incidence3 <- incidence_function3(df=jk.tmp3, nade_of_interest='RENAL') any_nade_incidence3 <- incidence_function3(df=jk.tmp3, nade_of_interest=c('CARDIOVASCULAR OR CEREBROVASCULAR','HEPATIC','METABOLIC','MALIGNANCY','RENAL')) any_mh_nade_no_sa_incidence3 <- incidence_function3(df=jk.tmp3, nade_of_interest=c('CARDIOVASCULAR OR CEREBROVASCULAR','HEPATIC','MENTAL HEALTH','METABOLIC','MALIGNANCY','RENAL')) death_incidence3 <- incidence_function3(df=jk.tmp3, nade_of_interest='DEATH') incidence_rate_df3 <- data.frame('NADE Category'=c('CARDIOVASCULAR OR CEREBROVASCULAR', 'HEPATIC', 'MENTAL HEALTH', 'METABOLIC', 'RENAL', 'Any NADE', 'Any NADE (including mental health)', 'Death'), 'on_hc2'=c(paste(cardio_incidence3[['incidence_hc']],' (',cardio_incidence3[['num_evt_hc']],')',sep=''), paste(hepatic_incidence3[['incidence_hc']],' (',hepatic_incidence3[['num_evt_hc']],')',sep=''), paste(mh_incidence3[['incidence_hc']],' (',mh_incidence3[['num_evt_hc']],')',sep=''), paste(metabolic_incidence3[['incidence_hc']],' (',metabolic_incidence3[['num_evt_hc']],')',sep=''), paste(renal_incidence3[['incidence_hc']],' (',renal_incidence3[['num_evt_hc']],')',sep=''), paste(any_nade_incidence3[['incidence_hc']],' (',any_nade_incidence3[['num_evt_hc']],')',sep=''), paste(any_mh_nade_no_sa_incidence3[['incidence_hc']],' (',any_mh_nade_no_sa_incidence3[['num_evt_hc']],')',sep=''), paste(death_incidence3[['incidence_hc']],' (',death_incidence3[['num_evt_hc']],')',sep='')), 'not_on_hc2'=c(paste(cardio_incidence3[['incidence_no_hc']],' (',cardio_incidence3[['num_evt_no_hc']],')',sep=''), paste(hepatic_incidence3[['incidence_no_hc']],' (',hepatic_incidence3[['num_evt_no_hc']],')',sep=''), paste(mh_incidence3[['incidence_no_hc']],' (',mh_incidence3[['num_evt_no_hc']],')',sep=''), paste(metabolic_incidence3[['incidence_no_hc']],' (',metabolic_incidence3[['num_evt_no_hc']],')',sep=''), paste(renal_incidence3[['incidence_no_hc']],' (',renal_incidence3[['num_evt_no_hc']],')',sep=''), paste(any_nade_incidence3[['incidence_no_hc']],' (',any_nade_incidence3[['num_evt_no_hc']],')',sep=''), paste(any_mh_nade_no_sa_incidence3[['incidence_no_hc']],' (',any_mh_nade_no_sa_incidence3[['num_evt_no_hc']],')',sep=''), paste(death_incidence3[['incidence_no_hc']],' (',death_incidence3[['num_evt_no_hc']],')',sep=''))) incidence_rate_df3 <- as.matrix(incidence_rate_df3) colnames(incidence_rate_df3) <- c('NADE Category','On HC','Not on HC') # Creating table for Jessie that has psychiatric diagnoses based on HC exposure, not ever/never use jk.tmp3$nade_on_hcp.factor <- factor(jk.tmp3$nade_on_hcp, levels=c(0,1), labels=c('On hormonal contraception','Not on hormonal contraception')) jk.mh <- subset(jk.tmp3, nade_category_new == 'MENTAL HEALTH' | nade_category2_new == 'MENTAL HEALTH') jk.mh$mh_nade <- jk.tmp[match(jk.mh$pid.age,jk.tmp$pid.age,nomatch=NA),'mh_nade'] jk.mh$mh_nade <- factor(jk.mh$mh_nade) mh_summ. <- summary(nade_on_hcp.factor ~ mh_nade, data=jk.mh, subset=!duplicated(pid.age), method='reverse',overall=TRUE) jk <- subset(mh_nade_long, mh_nade_category == 'MENTAL HEALTH' & cfar_pid %in% time.df$cfar_pid & age_mh_nade <= last_age & (is.na(age_after_year_gap) | age_mh_nade < age_after_year_gap)) jk$pid.age <- paste(jk$cfar_pid,jk$age_mh_nade,sep='.') jk$nade_on_hcp <- jk.mh[match(jk$pid.age,jk.mh$pid.age,nomatch=NA),'nade_on_hcp'] sj <- jk[which(duplicated(jk$pid.age) & jk$mh_nade_category == 'MENTAL HEALTH'),c('cfar_pid','pid.age','mh_nade','mh_nade_category')] sub_sj <- subset(jk, pid.age%in% sj$pid.age & !duplicated(pid.age) & jk$mh_nade_category == 'MENTAL HEALTH',select=c('cfar_pid','pid.age','mh_nade','mh_nade_category')) sj$nade_on_hcp <- jk.mh[match(sj$pid.age,jk.mh$pid.age,nomatch=NA),'nade_on_hcp'] sub_sj$nade_on_hcp <- jk.mh[match(sub_sj$pid.age,jk.mh$pid.age,nomatch=NA),'nade_on_hcp'] sj2 <- rbind(sj, subset(jk, pid.age %nin% sj$pid.age & !duplicated(pid.age), select=c(cfar_pid,pid.age,mh_nade,mh_nade_category,nade_on_hcp))) sj2 <- rbind(sj2,sub_sj) sj2$nade_on_hcp.factor <- factor(sj2$nade_on_hcp, levels=c(0,1), labels=c('Not on hormonal contraception','On hormonal contraception')) sj2$mh_nade <- factor(sj2$mh_nade) mh_summ.2 <- summary(nade_on_hcp.factor ~ mh_nade, data=sj2,method='reverse',overall=TRUE) # Using counts used in models, not total ones no.nades <- paste('NADE (',nade[['msm2']]$freq[2],')',sep='') no.mh.nades_only <- paste('Only mental health NADEs (',mh_nade_only.mod[['msm2']]$freq[2],')',sep='') no.mh.nades <- paste('NADEs including mental health (',mh_nade.mod[['msm2']]$freq[2],')',sep='') no.mh.nades.no.sa <- paste('NADEs including mental health (',mh_nade_no_sa.mod[['msm2']]$freq[2],')',sep='') simple_outcomes <- data.frame('Covariate'=c('Baseline age (rcs)', '~~~35 vs 30', '~~~35 vs 40', '~~~35 vs 50', 'Month (per 6 months)', 'Cumulative HCP use (per year)', 'Year of T0 (per 3 years)', 'Baseline CD4 (per 100)', 'Baseline log viral load (per unit)', 'HC use prior to T0', 'NADE prior to T0', 'Race: White', '~~~Non-white', 'Route of infection: Non-IDU', '~~~IDU', 'On HCP', 'HCV prior to T0'), 'nade_or'=c('',nade_or[1],nade_or2,nade_or3,nade_or[2:6], nade_or[7],nade_or[8], format(1.00,nsmall=2),nade_or[9], format(1.00,nsmall=2),nade_or[10], nade_or[11:12]), 'nade_ci'=c('',nade_ci[1],nade_ci2,nade_ci3,nade_ci[2:6],nade_ci[7],nade_ci[8], '',nade_ci[9],'',nade_ci[10:12]), 'nade_p'=c(nade_p[1],'','','',nade_p[2:9],'',nade_p[10],'',nade_p[11:12]), 'mh_nade_only_or'=c('',mh_nade_only_or[1],mh_nade_only_or2,mh_nade_only_or3, mh_nade_only_or[2:6],mh_nade_only_or[7],mh_nade_only_or[8], format(1.00,nsmall=2),mh_nade_only_or[9],format(1.00,nsmall=2),mh_nade_only_or[10], mh_nade_only_or[11:12]), 'mh_nade_only_ci'=c('',mh_nade_only_ci[1],mh_nade_only_ci2,mh_nade_only_ci3, mh_nade_only_ci[2:6],mh_nade_only_ci[7],mh_nade_only_ci[8], '',mh_nade_only_ci[9],'',mh_nade_only_ci[10:12]), 'mh_nade_only_p'=c(mh_nade_only_p[1],'','','',mh_nade_only_p[2:9],'',mh_nade_only_p[10],'',mh_nade_only_p[11:12]), 'mh_nade_no_sa_or'=c('',mh_nade_no_sa_or[1],mh_nade_no_sa_or2,mh_nade_no_sa_or3,mh_nade_no_sa_or[2:6], mh_nade_no_sa_or[7],mh_nade_no_sa_or[8], format(1.00,nsmall=2),mh_nade_no_sa_or[9], format(1.00,nsmall=2),mh_nade_no_sa_or[10], mh_nade_no_sa_or[11:12]), 'mh_nade_no_sa_ci'=c('',mh_nade_no_sa_ci[1],mh_nade_no_sa_ci2,mh_nade_no_sa_ci3,mh_nade_no_sa_ci[2:6],mh_nade_no_sa_ci[7],mh_nade_no_sa_ci[8], '',mh_nade_no_sa_ci[9],'',mh_nade_no_sa_ci[10:12]), 'mh_nade_no_sa_p'=c(mh_nade_no_sa_p[1],'','','',mh_nade_no_sa_p[2:9],'',mh_nade_no_sa_p[10],'',mh_nade_no_sa_p[11:12])) simple_outcomes <- as.matrix(simple_outcomes) colnames(simple_outcomes) <- c('Covariate',rep(c('OR','95\\% CI','P'),times=3)) no.nade.dth <- paste('NADE/Death (',nade_dth[['msm2']]$freq[2],')',sep='') no.mh.nade_only.dth <- paste('Mental health NADEs/Death (',mh_nade_only_dth[['msm2']]$freq[2],')',sep='') no.mh.nade.dth <- paste('NADE, including all mental health ones (',mh_nade_dth[['msm2']]$freq[2],')',sep='') no.mh.nade.no.sa.dth <- paste('NADE, including mental health/Death (',mh_nade_no_sa_dth[['msm2']]$freq[2],')',sep='') compound_outcomes <- data.frame('Covariate'=c('Baseline age (rcs)', '~~~35 vs 30', '~~~35 vs 40', '~~~35 vs 50', 'Month (per 6 months)', 'Cumulative HCP use (per year)', 'Year of T0 (per 3 years)', 'Baseline CD4 (per 100)', 'Baseline log viral load (per unit)', 'HC use prior to T0', 'NADE prior to T0', 'Race: White', '~~~Non-white', 'Route of infection: Non-IDU', '~~~IDU', 'On HCP', 'HCV prior to T0'), 'nade_dth_or'=c('',nade_dth_or[1],nade_dth_or2,nade_dth_or3,nade_dth_or[2:6], nade_dth_or[7],nade_dth_or[8], format(1.00,nsmall=2),nade_dth_or[9], format(1.00,nsmall=2),nade_dth_or[10], nade_dth_or[11:12]), 'nade_ci'=c('',nade_dth_ci[1],nade_dth_ci2,nade_dth_ci3,nade_dth_ci[2:6],nade_dth_ci[7],nade_dth_ci[8], '',nade_dth_ci[9],'',nade_dth_ci[10:12]), 'nade_p'=c(nade_dth_p[1],'','','',nade_dth_p[2:9],'',nade_dth_p[10],'',nade_dth_p[11:12]), 'mh_nade_only_or'=c('',mh_nade_only_dth_or[1],mh_nade_only_dth_or2,mh_nade_only_dth_or3, mh_nade_only_dth_or[2:6],mh_nade_only_dth_or[7],mh_nade_only_dth_or[8], format(1.00,nsmall=2),mh_nade_only_dth_or[9],format(1.00,nsmall=2),mh_nade_only_dth_or[10], mh_nade_only_dth_or[11:12]), 'mh_nade_only_ci'=c('',mh_nade_only_dth_ci[1],mh_nade_only_dth_ci2,mh_nade_only_dth_ci3,mh_nade_only_dth_ci[2:6], mh_nade_only_dth_ci[7],mh_nade_only_dth_ci[8],'', mh_nade_only_dth_ci[9],'',mh_nade_only_dth_ci[10:12]), 'mh_nade_only_p'=c(mh_nade_only_dth_p[1],'','','',mh_nade_only_dth_p[2:9],'',mh_nade_only_dth_p[10],'', mh_nade_only_dth_p[11:12]), 'mh_nade_no_sa_or'=c('',mh_nade_no_sa_dth_or[1],mh_nade_no_sa_dth_or2,mh_nade_no_sa_dth_or3,mh_nade_no_sa_dth_or[2:6], mh_nade_no_sa_dth_or[7],mh_nade_no_sa_dth_or[8], format(1.00,nsmall=2),mh_nade_no_sa_dth_or[9], format(1.00,nsmall=2),mh_nade_no_sa_dth_or[10], mh_nade_no_sa_dth_or[11:12]), 'mh_nade_no_sa_ci'=c('',mh_nade_no_sa_dth_ci[1],mh_nade_no_sa_dth_ci2,mh_nade_no_sa_dth_ci3,mh_nade_no_sa_dth_ci[2:6],mh_nade_no_sa_dth_ci[7], mh_nade_no_sa_dth_ci[8], '',mh_nade_no_sa_dth_ci[9],'',mh_nade_no_sa_dth_ci[10:12]), 'mh_nade_no_sa_p'=c(mh_nade_no_sa_dth_p[1],'','','',mh_nade_no_sa_dth_p[2:9],'',mh_nade_no_sa_dth_p[10],'',mh_nade_no_sa_dth_p[11:12])) compound_outcomes <- as.matrix(compound_outcomes) colnames(compound_outcomes) <- c('Covariate',rep(c('OR','95\\% CI','P'),times=3)) # Just death as outcome no.of.deaths <- table(df$death_outcome_indicator)[2] death_outcome <- data.frame('Covariate'=c('Cumulative HCP use (per year)','On HCP'), 'OR'=death_simple_or, 'CI'=death_simple_ci, 'P'=death_simple_p) # How many women received HC at any point during followup number.w.hc <- tapply(time.df$hcp_ind, INDEX=time.df$cfar_pid, FUN=function(y){as.numeric(any(y == 1))}) no.nade.depo <- paste('NADE (',nade.depo[['msm2']]$freq[2],')',sep='') no.mh_nade_only.depo <- paste('Mental health NADEs only (',mh_nade_only.depo.mod[['msm2']]$freq[2],')',sep='') no.mh_nade.depo <- paste('NADEs, including mental health ones (',mh_nade.depo.mod[['msm2']]$freq[2],')',sep='') no.mh_nade_no_sa.depo <- paste('NADEs, including mental health ones (',mh_nade_no_sa.depo.mod[['msm2']]$freq[2],')',sep='') simple_depo_outcomes <- data.frame('Covariate'=c('Baseline age (rcs)', '~~~35 vs 30', '~~~35 vs 40', '~~~35 vs 50', 'Month (per 6 months)', 'Cumulative Depo use (per year)', 'Year of T0 (per 3 years)', 'Baseline CD4 (per 100)', 'Baseline log viral load (per unit)', 'HC use prior to T0', 'NADE prior to T0', 'Race: White', '~~~Non-white', 'Route of infection: Non-IDU', '~~~IDU', 'On Depo', 'HCV prior to T0'), 'nade_depo_or'=c('',nade_depo_or[1],nade_depo_or2,nade_depo_or3,nade_depo_or[2:6], nade_depo_or[7],nade_depo_or[8], format(1.00,nsmall=2),nade_depo_or[9], format(1.00,nsmall=2),nade_depo_or[10], nade_depo_or[11:12]), 'nade_depo_ci'=c('',nade_depo_ci[1],nade_depo_ci2,nade_depo_ci3,nade_depo_ci[2:6],nade_depo_ci[7],nade_depo_ci[8], '',nade_depo_ci[9],'',nade_depo_ci[10:12]), 'nade_depo_p'=c(nade_depo_p[1],'','','',nade_depo_p[2:9],'',nade_depo_p[10],'',nade_depo_p[11:12]), 'mh_nade_only_depo_or'=c('',mh_nade_only_depo_or[1],mh_nade_only_depo_or2, mh_nade_only_depo_or3,mh_nade_only_depo_or[2:6], mh_nade_only_depo_or[7],mh_nade_only_depo_or[8], format(1.00,nsmall=2),mh_nade_only_depo_or[9], format(1.00,nsmall=2),mh_nade_only_depo_or[10], mh_nade_only_depo_or[11:12]), 'mh_nade_only_depo_ci'=c('',mh_nade_only_depo_ci[1],mh_nade_only_depo_ci2,mh_nade_only_depo_ci3, mh_nade_only_depo_ci[2:6],mh_nade_only_depo_ci[7], mh_nade_only_depo_ci[8],'',mh_nade_only_depo_ci[9],'',mh_nade_only_depo_ci[10:12]), 'mh_nade_only_depo_p'=c(mh_nade_only_depo_p[1],'','','',mh_nade_only_depo_p[2:9],'',mh_nade_only_depo_p[10], '',mh_nade_only_depo_p[11:12]), 'mh_nade_no_sa_depo_or'=c('',mh_nade_no_sa_depo_or[1],mh_nade_no_sa_depo_or2,mh_nade_no_sa_depo_or3,mh_nade_no_sa_depo_or[2:6], mh_nade_no_sa_depo_or[7],mh_nade_no_sa_depo_or[8], format(1.00,nsmall=2),mh_nade_no_sa_depo_or[9], format(1.00,nsmall=2),mh_nade_no_sa_depo_or[10], mh_nade_no_sa_depo_or[11:12]), 'mh_nade_no_sa_depo_ci'=c('',mh_nade_no_sa_depo_ci[1],mh_nade_no_sa_depo_ci2,mh_nade_no_sa_depo_ci3,mh_nade_no_sa_depo_ci[2:6],mh_nade_no_sa_depo_ci[7], mh_nade_no_sa_depo_ci[8], '',mh_nade_no_sa_depo_ci[9],'',mh_nade_no_sa_depo_ci[10:12]), 'mh_nade_no_sa_depo_p'=c(mh_nade_no_sa_depo_p[1],'','','',mh_nade_no_sa_depo_p[2:9],'',mh_nade_no_sa_depo_p[10],'',mh_nade_no_sa_depo_p[11:12])) simple_depo_outcomes <- as.matrix(simple_depo_outcomes) colnames(simple_depo_outcomes) <- c('Covariate',rep(c('OR','95\\% CI','P'),times=3)) no.nade_dth.depo <- paste('NADE/Death (',nade_dth_depo[['msm2']]$freq[2],')',sep='') no.mh_nade_only_dth.depo <- paste('Mental health NADEs/Death (',mh_nade_only_dth_depo[['msm2']]$freq[2],')',sep='') no.mh_mh_nade_dth.depo <- paste('NADE, including mental health/Death (',mh_nade_dth_depo[['msm2']]$freq[2],')',sep='') no.mh_nade_no_sa_dth.depo <- paste('NADE, including mental health/Death (',mh_nade_no_sa_dth_depo[['msm2']]$freq[2],')',sep='') compound_depo_outcomes <- data.frame('Covariate'=c('Baseline age (rcs)', '~~~35 vs 30', '~~~35 vs 40', '~~~35 vs 50', 'Month (per 6 months)', 'Cumulative Depo use (per year)', 'Year of T0 (per 3 years)', 'Baseline CD4 (per 100)', 'Baseline log viral load (per unit)', 'HC use prior to T0', 'NADE prior to T0', 'Race: White', '~~~Non-white', 'Route of infection: Non-IDU', '~~~IDU', 'On Depo', 'HCV prior to T0'), 'nade_dth_depo_or'=c('',nade_dth_depo_or[1],nade_dth_depo_or2,nade_dth_depo_or3, nade_dth_depo_or[2:6], nade_dth_depo_or[7],nade_dth_depo_or[8], format(1.00,nsmall=2),nade_dth_depo_or[9], format(1.00,nsmall=2),nade_dth_depo_or[10], nade_dth_depo_or[11:12]), 'nade_dth_depo_ci'=c('',nade_dth_depo_ci[1],nade_dth_depo_ci2,nade_dth_depo_ci3,nade_dth_depo_ci[2:6], nade_dth_depo_ci[7],nade_dth_depo_ci[8],'',nade_dth_depo_ci[9],'',nade_dth_depo_ci[10:12]), 'nade_dth_depo_p'=c(nade_dth_depo_p[1],'','','',nade_dth_depo_p[2:9],'',nade_dth_depo_p[10],'', nade_dth_depo_p[11:12]), 'mh_nade_only_dth_depo_or'=c('',mh_nade_only_dth_depo_or[1],mh_nade_only_dth_depo_or2, mh_nade_only_dth_depo_or3,mh_nade_only_dth_depo_or[2:6], mh_nade_only_dth_depo_or[7],mh_nade_only_dth_depo_or[8], format(1.00,nsmall=2),mh_nade_only_dth_depo_or[9], format(1.00,nsmall=2),mh_nade_only_dth_depo_or[10],mh_nade_only_dth_depo_or[11:12]), 'mh_nade_only_dth_depo_ci'=c('',mh_nade_only_dth_depo_ci[1],mh_nade_only_dth_depo_ci2,mh_nade_only_dth_depo_ci3, mh_nade_only_dth_depo_ci[2:6],mh_nade_only_dth_depo_ci[7], mh_nade_only_dth_depo_ci[8],'',mh_nade_only_dth_depo_ci[9],'', mh_nade_only_dth_depo_ci[10:12]), 'mh_nade_only_dth_depo_p'=c(mh_nade_only_dth_depo_p[1],'','','',mh_nade_only_dth_depo_p[2:9],'', mh_nade_only_dth_depo_p[10],'',mh_nade_only_dth_depo_p[11:12]), 'mh_nade_no_sa_dth_depo_or'=c('',mh_nade_no_sa_dth_depo_or[1],mh_nade_no_sa_dth_depo_or2,mh_nade_no_sa_dth_depo_or3, mh_nade_no_sa_dth_depo_or[2:6], mh_nade_no_sa_dth_depo_or[7],mh_nade_no_sa_dth_depo_or[8], format(1.00,nsmall=2),mh_nade_no_sa_dth_depo_or[9], format(1.00,nsmall=2),mh_nade_no_sa_dth_depo_or[10], mh_nade_no_sa_dth_depo_or[11:12]), 'mh_nade_no_sa_dth_depo_ci'=c('',mh_nade_no_sa_dth_depo_ci[1],mh_nade_no_sa_dth_depo_ci2,mh_nade_no_sa_dth_depo_ci3,mh_nade_no_sa_dth_depo_ci[2:6], mh_nade_no_sa_dth_depo_ci[7],mh_nade_no_sa_dth_depo_ci[8], '',mh_nade_no_sa_dth_depo_ci[9],'',mh_nade_no_sa_dth_depo_ci[10:12]), 'mh_nade_no_sa_dth_depo_p'=c(mh_nade_no_sa_dth_depo_p[1],'','','',mh_nade_no_sa_dth_depo_p[2:9],'',mh_nade_no_sa_dth_depo_p[10],'', mh_nade_no_sa_dth_depo_p[11:12])) compound_depo_outcomes <- as.matrix(compound_depo_outcomes) colnames(compound_depo_outcomes) <- c('Covariate',rep(c('OR','95\\% CI','P'),times=3)) # Just death as outcome death_depo_outcome <- data.frame('Covariate'=c('Cumulative Depo use (per year)','On Depo'), 'OR'=death_simple_depo_or, 'CI'=death_simple_depo_ci, 'P'=death_simple_depo_p) cj <- subset(time.df, !duplicated(cfar_pid)) cj$for_exposure_time <- with(cj, last_age - age_at_t0) sum(cj$for_exposure_time[which(cj$any_hc_use.factor == 'No HC use')]) sum(cj$for_exposure_time[which(cj$any_hc_use.factor == 'HC use')])