\documentclass[10pt]{article} \bibliographystyle{cj} \usepackage{longtable} % allows tables to break \usepackage[pdftex]{lscape} % allows tables to be landscaped %\usepackage[pdftex]{graphicx} \usepackage{pdfpages} \usepackage[letterpaper,margin=1in]{geometry} \usepackage{setspace,relsize} % needed for latex(describe()), \code \usepackage{moreverb} % for verbatimtabinput % \usepackage{tabularx} %\usepackage{colortbl} %\usepackage{placeins} %\usepackage[dotinlabels]{titletoc} %\usepackage{amsmath} %\usepackage{amsfonts} \usepackage{amssymb} \usepackage{fancyhdr} \usepackage{graphicx, subfigure} %\usepackage{vmargin} \usepackage{lscape} \usepackage{rotating} \usepackage{endnotes} \usepackage{color} \def\linkcol{blue} % \usepackage{tabularx} % required in the preamble %\setpapersize{USletter} % \usepackage{tabularx} % required in the preamble %\setpapersize{USletter} \begin{document} %Define the sub directory for placement of graphics \SweaveOpts{prefix.string=pspdf/nonparm} %Define all graphics to be 100\% of their width \setkeys{Gin}{width=1.0\textwidth} %default is 0.8 %\def\printid{1} <>= #Clear existing data rm(list=ls()) #----------------------------------# # Loading libraries # #----------------------------------# library(foreign) library(Hmisc) library(rms) library(survival) library(cmprsk) #---------------------------------------# # Functions # #---------------------------------------# # Function to do ADE and ADE/Death analyses for a given subgroup of subjects source("modelfun.R") #------------------------------------# # Summary.fun # #------------------------------------# # Creates different summaries for the adjusted models summary.fun <- function(mod,cov,risk.levels){ # Comparing CD4's if(cov == "go"){ summ.50 <- summary(mod, artcc_origin_g.factor = "WEST", gender.factor = "Male", cd4_t0_corrected = c(200,50), lrna_t0_corrected = c(5,4), #hemoglobin_t0_corrected=c(12,13), age = c(40,20), pi_based.factor = "No", nnrti_based.factor = "No", # other_based.factor = "No", haartyr = c(2001,1998), risk.factor = risk.levels[1], stagec.factor = "No") } else if (cov == "nb"){ summ.50 <- summary(mod, native.born.factor = "Yes", gender.factor = "Male", cd4_t0_corrected = c(200,50), lrna_t0_corrected = c(5,4), #hemoglobin_t0_corrected=c(12,13), age = c(40,20), pi_based.factor = "No", nnrti_based.factor = "No", # other_based.factor = "No", haartyr = c(2001,1998), risk.factor = risk.levels[1], stagec.factor = "No") } else if (cov == "race"){ summ.50 <- summary(mod, combined.factor = "White", gender.factor = "Male", cd4_t0_corrected = c(200,50), lrna_t0_corrected = c(5,4), #hemoglobin_t0_corrected=c(13,12), age = c(40,20), pi_based.factor = "No", nnrti_based.factor = "No", # other_based.factor = "No", haartyr = c(2001,1998), risk.factor = risk.levels[1], stagec.factor = "No") } summ.100 <- summary(mod, est.all=FALSE, cd4_t0_corrected = c(200,100)) summ.350 <- summary(mod, est.all=FALSE, cd4_t0_corrected = c(200,350)) summ.500 <- summary(mod, est.all=FALSE, cd4_t0_corrected = c(200,500)) # Comparing log viral loads summ.vl.6 <- summary(mod, est.all=FALSE, lrna_t0_corrected = c(5,6)) # Comparing ages summ.age.30 <- summary(mod, est.all=FALSE, age = c(40,30)) summ.age.50 <- summary(mod, est.all=FALSE, age = c(40,50)) summ.age.60 <- summary(mod, est.all=FALSE, age = c(40,60)) # year of HAART start summ.haart.2004 <- summary(mod, est.all=FALSE, haartyr = c(2001,2004)) summ.haart.2006 <- summary(mod, est.all=FALSE, haartyr = c(2001,2006)) return(list(summ.50=summ.50, summ.100=summ.100, summ.350=summ.350, summ.500=summ.500, summ.vl.6=summ.vl.6, #summ.hg.14=summ.hg.14, summ.age.30=summ.age.30, summ.age.50=summ.age.50, summ.age.60=summ.age.60, summ.haart.2004=summ.haart.2004, summ.haart.2006=summ.haart.2006))} #----------------------------------# # Format.fun # #----------------------------------# # Formats the summaries into a data frame. This data frame still needs to be adjusted to incorporate format.fun <- function(summ.,mod,cov,risk.levels,main.levels){ mod.orig <- summ.[[1]] order.rows <- vector(mode="numeric",length=(length(rownames(mod.orig))/2)) if(cov == "go"){ main.levels2 <- paste("artcc_origin_g.factor - ",main.levels[2:length(main.levels)],":",main.levels[1],sep="") } else if (cov == "nb"){ main.levels2 <- paste("native.born.factor - ",main.levels[2:length(main.levels)],":",main.levels[1],sep="") } else if (cov == "race"){ main.levels2 <- paste("combined.factor - ",main.levels[2:length(main.levels)],":",main.levels[1],sep="") } order.rows[1:(length(main.levels)-1)] <- which(rownames(mod.orig) %in% main.levels2) order.rows[length(main.levels):(length(main.levels) + 6)] <- c(which(rownames(mod.orig) == "gender.factor - Female:Male"), which(rownames(mod.orig) == "cd4_t0_corrected"), which(rownames(mod.orig) == "lrna_t0_corrected"), which(rownames(mod.orig) == "age"), which(rownames(mod.orig) == "pi_based.factor - Yes:No"), which(rownames(mod.orig) == "nnrti_based.factor - Yes:No"), which(rownames(mod.orig) == "haartyr")) risk.levels2 <- paste("risk.factor - ",risk.levels[2:length(risk.levels)],":",risk.levels[1],sep="") order.rows[(length(main.levels)+7):(length(main.levels)+6+(length(risk.levels)-1))] <- which(rownames(mod.orig) %in% risk.levels2) order.rows[length(order.rows)] <- which(rownames(mod.orig) == "stagec.factor - Yes:No") df <- round(mod.orig[,c("Effect","Lower 0.95","Upper 0.95")],2) ci <- paste("(",df[,"Lower 0.95"],", ",df[,"Upper 0.95"],")",sep="") df <- cbind(df[,1],ci) df <- df[(order.rows+1),] summ.p <- anova(mod)[,"P"] summ.p <- ifelse(summ.p < 0.001, "< 0.001", ifelse(summ.p < 0.01 & summ.p >= 0.001, round(summ.p,3), round(summ.p,2))) nonlinear.p <- summ.p[(length(summ.p)-1)] summ.p <- summ.p[names(summ.p) %nin% c(" Nonlinear","TOTAL NONLINEAR","TOTAL")] summ.p <- c(summ.p[1],rep("",times=(length(main.levels)-2)),summ.p[2:9],rep("",times=(length(risk.levels)-2)), summ.p[10]) df <- cbind(df,summ.p) colnames(df) <- c("Hazard Ratio","95\\% CI","P") rownames.mod.orig <- rownames(mod.orig) rownames.mod.orig <- rownames.mod.orig[order.rows] rownames(df) <- rownames.mod.orig which.cd4 <- which(rownames(df) == "cd4_t0_corrected") which.vl <- which(rownames(df) == "lrna_t0_corrected") which.age <- which(rownames(df) == "age") which.haart <- which(rownames(df) == "haartyr") df <- rbind(df[1:which.cd4,], c(round(summ.[[2]][2,4],2),paste("(",round(summ.[[2]][2,6],2),", ",round(summ.[[2]][2,7],2),")",sep=""),""), c(round(summ.[[3]][2,4],2),paste("(",round(summ.[[3]][2,6],2),", ",round(summ.[[3]][2,7],2),")",sep=""),""), c(round(summ.[[4]][2,4],2),paste("(",round(summ.[[4]][2,6],2),", ",round(summ.[[4]][2,7],2),")",sep=""),""), df[(which.cd4+1):which.vl,], c(round(summ.[[5]][2,4],2),paste("(",round(summ.[[5]][2,6],2),", ",round(summ.[[5]][2,7],2),")",sep=""),""), df[(which.vl+1):which.age,], c(round(summ.[[6]][2,4],2),paste("(",round(summ.[[6]][2,6],2),", ",round(summ.[[6]][2,7],2),")",sep=""),""), c(round(summ.[[7]][2,4],2),paste("(",round(summ.[[7]][2,6],2),", ",round(summ.[[7]][2,7],2),")",sep=""),""), c(round(summ.[[8]][2,4],2),paste("(",round(summ.[[8]][2,6],2),", ",round(summ.[[8]][2,7],2),")",sep=""),""), df[(which.age+1):which.haart,], c(round(summ.[[9]][2,4],2),paste("(",round(summ.[[9]][2,6],2),", ",round(summ.[[9]][2,7],2),")",sep=""),""), c(round(summ.[[10]][2,4],2),paste("(",round(summ.[[10]][2,6],2),", ",round(summ.[[10]][2,7],2),")",sep=""),""), df[(which.haart+1):dim(df)[1],]) if(cov == "go"){ covariate <- c("Northern Africa and Middle East","Sub-Saharan Africa", "Asia","Latin America") } else if (cov == "nb"){ covariate <- "Non-native born" } else if (cov == "race"){ covariate <- c("Black","Hispanic","Asian","Arab","Indigenous") } df <- cbind("Covariate"=c(covariate,c("Female","Baseline CD4 (50 vs 200)","Baseline CD4 (100 vs 200)", "Baseline CD4 (350 vs 200)","Baseline CD4 (500 vs 200)", "Baseline log viral load (4 vs 5)","Baseline log viral load (6 vs 5)", "Age (20 vs 40)","Age (30 vs 40)","Age (50 vs 40)","Age (60 vs 40)", "PI-based initial ART","NNRTI-based initial ART", "Year of HAART start (1998 vs 2001)","Year of HAART start (2004 vs 2001)", "Year of HAART start (2006 vs 2001)"), paste("Risk factor -- ",risk.levels[2:length(risk.levels)],sep=""),"Prior ADE"), df) return(list(df=df, nonlinear.p=nonlinear.p)) } #----------------------------------# # Loading data # #----------------------------------# # Baseline data baseline <- read.dta("baselinedata_for_cathy.dta",convert.factors=FALSE,convert.underscore=FALSE) # Long format with hemoglobin data hemoglobin <- read.dta("haemoglobin_for_cathy.dta",convert.factors=FALSE,convert.underscore=FALSE) origin <- read.dta("info_origin_race_2011.dta",convert.factors=FALSE,convert.underscore=FALSE) # Long format with CD4 data cd4.data <- read.dta("t22_measurements_lab_cd4.dta",convert.factors=FALSE,convert.underscore=FALSE) # Long format with viral load data vl.data <- read.dta("t24_measurements_lab_rna.dta",convert.factors=FALSE,convert.underscore=FALSE) # Long format with ADE data ade.data <- read.dta("t40_aids_events.dta",convert.factors=FALSE,convert.underscore=FALSE) # Geographic origin data for Vanderbilt subjects vu.geo.org <- read.csv('region_of_origin_12jan2012.csv',sep=',',header=TRUE,stringsAsFactors=FALSE) names(vu.geo.org) <- c('patient','origin_g.factor') vu.geo.org$unique_id <- paste(vu.geo.org$patient,'Vanderbilt',sep='.') vu.geo.org$origin_g <- ifelse(vu.geo.org$origin_g.factor == 'Europe',0, ifelse(vu.geo.org$origin_g.factor == 'Northern Africa and Middle East',11, ifelse(vu.geo.org$origin_g.factor == 'Sub-Saharan Africa',12, ifelse(vu.geo.org$origin_g.factor == 'Asia',20, ifelse(vu.geo.org$origin_g.factor == 'Australia, Oceania, and New Zealand',30, ifelse(vu.geo.org$origin_g.factor == 'North America',51, ifelse(vu.geo.org$origin_g.factor == 'Central and South America',52, ifelse(vu.geo.org$origin_g.factor == 'Unknown',99,NA)))))))) vu.geo.org <- vu.geo.org[order(vu.geo.org$unique_id),] # Event codes baseline$event1 <- ifelse(baseline$event1 == "", NA, baseline$event1) baseline$event2 <- ifelse(baseline$event2 == "", NA, baseline$event2) baseline$event3 <- ifelse(baseline$event3 == "", NA, baseline$event3) baseline$event4 <- ifelse(baseline$event4 == "", NA, baseline$event4) baseline$event5 <- ifelse(baseline$event5 == "", NA, baseline$event5) baseline$event6 <- ifelse(baseline$event6 == "", NA, baseline$event6) baseline$event7 <- ifelse(baseline$event7 == "", NA, baseline$event7) baseline$event8 <- ifelse(baseline$event8 == "", NA, baseline$event8) # For those who have missing event1 values but non-missing event_d1 values, need to check the aids variable # If aids = 1, then make event1 = "UNK"; otherwise, leave as an empty string baseline$event1 <- ifelse(is.na(baseline$event1) & !is.na(baseline$event_d1) & baseline$aids == 1,"UNK", baseline$event1) baseline$event_d1 <- ifelse(is.na(baseline$event1) & !is.na(baseline$event_d1) & baseline$aids == 0,NA, as.character(baseline$event_d1)) baseline$event_d1 <- as.Date(baseline$event_d1) event_codes <- read.csv("event_codes.csv",sep=",",header=TRUE,stringsAsFactors=FALSE) baseline$event1_name <- event_codes[match(baseline$event1,event_codes$event,nomatch=NA),"diseasedescription"] baseline$event2_name <- event_codes[match(baseline$event2,event_codes$event,nomatch=NA),"diseasedescription"] baseline$event3_name <- event_codes[match(baseline$event3,event_codes$event,nomatch=NA),"diseasedescription"] baseline$event4_name <- event_codes[match(baseline$event4,event_codes$event,nomatch=NA),"diseasedescription"] baseline$event5_name <- event_codes[match(baseline$event5,event_codes$event,nomatch=NA),"diseasedescription"] baseline$event6_name <- event_codes[match(baseline$event6,event_codes$event,nomatch=NA),"diseasedescription"] baseline$event7_name <- event_codes[match(baseline$event7,event_codes$event,nomatch=NA),"diseasedescription"] baseline$event8_name <- event_codes[match(baseline$event8,event_codes$event,nomatch=NA),"diseasedescription"] @ <>= #----------------------------------# # Formatting data # #----------------------------------# # Defining a unique subject identifier baseline$unique_id <- paste(baseline$patient,baseline$cohort,sep=".") hemoglobin$unique_id <- paste(hemoglobin$patient,hemoglobin$cohort,sep=".") hemoglobin <- subset(hemoglobin, unique_id %in% baseline$unique_id) origin$unique_id <- paste(origin$patient,origin$cohort,sep=".") cd4.data$unique_id <- paste(cd4.data$patient,cd4.data$cohort,sep=".") cd4.data <- subset(cd4.data, unique_id %in% baseline$unique_id) vl.data$unique_id <- paste(vl.data$patient,vl.data$cohort,sep=".") vl.data <- subset(vl.data, unique_id %in% baseline$unique_id) ade.data$unique_id <- paste(ade.data$patient,ade.data$cohort,sep=".") ade.data <- subset(ade.data, unique_id %in% baseline$unique_id) # Defining the first year of HAART baseline$end_of_first_year_haart <- baseline$haart_d + 365.25 # Adding 1 year to haart_d date # Defining whether a subject had an ADE in the first year of HAART baseline$ade_first_year_of_haart <- ifelse(baseline$event_d1 >= baseline$haart_d & baseline$event_d1 <= baseline$end_of_first_year_haart & !is.na(baseline$event_d1) & baseline$event_d1 <= baseline$clinfup_d & baseline$event_d1 <= baseline$closeDate,1,0) baseline$ade_first_year_of_haart.factor <- factor(baseline$ade_first_year_of_haart, levels=c(0,1), labels=c("No","Yes")) baseline$death_first_year_of_haart <- ifelse(baseline$death_d <= baseline$end_of_first_year_haart & !is.na(baseline$death_d),1,0) baseline$death_first_year_of_haart.factor <- factor(baseline$death_first_year_of_haart, levels=c(0,1), labels=c("No","Yes")) baseline$ade_or_death_first_year_of_haart <- ifelse(baseline$ade_first_year_of_haart == 1 | baseline$death_first_year_of_haart == 1, 1, 0) baseline$ade_or_death_first_year_of_haart.factor <- factor(baseline$ade_or_death_first_year_of_haart, levels=c(0,1), labels=c("No","Yes")) # Creating factor variables with labels for numerical categorical variables # Combine "Blood" with "Other or unknown" baseline$risk <- ifelse(baseline$risk %in% c(4,5) & !is.na(baseline$risk),5, ifelse(!is.na(baseline$risk),baseline$risk,NA)) baseline$risk.factor <- factor(baseline$risk, levels=c(3,2,1,5), labels=c("Heterosexual","IDU","MSM","Blood, Other or unknown")) baseline$region.factor <- factor(baseline$region, levels=c(0,1,2), labels=c("Europe","Canada","US")) # Sorting baseline by unique_id in order to add in Vanderbilt geographic origin data baseline <- baseline[order(baseline$unique_id),] which.row <- which(baseline$cohort == 'Vanderbilt') baseline$origin_g[which.row] <- vu.geo.org$origin_g baseline$origin_g.factor <- factor(baseline$origin_g, levels=c(0,11,12,20,30,51,52,99), labels=c("Europe","Northern Africa and Middle East", "Sub-Saharan Africa","Asia", "Australia, Oceania and New Zealand", "North America","Central and South America", "Unknown")) baseline$origin_g_combined <- ifelse(baseline$origin_g == 30, 51,baseline$origin_g) baseline$origin_g_combined.factor <- factor(baseline$origin_g_combined, levels=c(0,11,12,20,51,52,99), labels=c("Europe","Northern Africa and Middle East", "Sub-Saharan Africa","Asia","North America", "Central and South America", "Unknown")) baseline$gender.factor <- factor(baseline$gender, levels=c(1,2), labels=c("Male","Female")) baseline$stagec.factor <- factor(baseline$stagec, levels=c(0,1), labels=c("No","Yes")) baseline$cohort.factor <- factor(baseline$cohort) baseline$combined.factor <- factor(baseline$combined, levels=c(0,1,2,3,4,5,9), labels=c("White","Black","Hispanic","Asian","Arab","Indigenous","Unspecified")) # Defining ART-CC's version of geographic origin from origin_g baseline$artcc_origin_g <- ifelse(baseline$origin_g %in% c(0,30,51),"WEST", ifelse(baseline$origin_g == 11, "NAME", ifelse(baseline$origin_g == 12, "SSA", ifelse(baseline$origin_g == 20, "ASIA", ifelse(baseline$origin_g == 52, "LA", ifelse(baseline$origin_g == 99, "Unknown",NA)))))) baseline$artcc_origin_g.factor <- factor(baseline$artcc_origin_g, levels=c("WEST","NAME","SSA","ASIA","LA","Unknown")) baseline$native.born <- ifelse(baseline$artcc_origin_g.factor == "WEST" & !is.na(baseline$artcc_origin_g.factor),1, ifelse(!is.na(baseline$artcc_origin_g.factor) & baseline$artcc_origin_g.factor != "Unknown",0, ifelse(!is.na(baseline$artcc_origin_g.factor) & baseline$artcc_origin_g.factor == "Unknown",2,NA))) baseline$native.born.factor <- factor(baseline$native.born, levels=c(1,0,2), labels=c("Yes","No","Unknown")) # Defining whether an initial ART regimen was PI-based, NNRTI-based, or Other. Will create separate indicator # variables to define this. baseline$pi_based <- ifelse(baseline$totpi > 0,1,0) baseline$pi_based.factor <- factor(baseline$pi_based, levels=c(0,1), labels=c("No","Yes")) baseline$nnrti_based <- ifelse(baseline$totnnrti > 0,1,0) baseline$nnrti_based.factor <- factor(baseline$nnrti_based, levels=c(0,1), labels=c("No","Yes")) baseline$other_based <- ifelse(baseline$totpi == 0 & baseline$totnnrti == 0,1,0) baseline$other_based.factor <- factor(baseline$other_based, levels=c(0,1), labels=c("No","Yes")) rlt_based <- apply(subset(baseline, select=c(haart0_1,haart0_2,haart0_3,haart0_4,haart0_5,haart0_6,haart0_7, haart0_8,haart0_9)),1,FUN=function(y){ as.numeric(any(y == "RLT"))}) baseline$rlt_based <- rlt_based baseline$rlt_based.factor <- factor(baseline$rlt_based, levels=c(0,1),labels=c("No","Yes")) @ <>= # Defining L2FU baseline$l2fu <- ifelse(baseline$clinfup_d < baseline$end_of_first_year_haart & (baseline$clinfup_d+365.25) < baseline$closeDate & (((baseline$clinfup_d + 365.25) < baseline$death_d & !is.na(baseline$death_d)) | is.na(baseline$death_d)),1,0) baseline$l2fu.factor <- factor(baseline$l2fu, levels=c(0,1),labels=c("No","Yes")) # Defining L2FU date baseline$l2fu_d <- ifelse(baseline$l2fu == 1,as.character(baseline$clinfup_d),NA) baseline$l2fu_d <- as.Date(baseline$l2fu_d) baseline$l2fu <- ifelse(abs(baseline$l2fu_d - baseline$end_of_first_year_haart) < 1 & baseline$l2fu == 1,0,baseline$l2fu) baseline$l2fu.factor <- factor(baseline$l2fu, levels=c(0,1),labels=c("No","Yes")) baseline$l2fu_d <- ifelse(abs(baseline$l2fu_d - baseline$end_of_first_year_haart) < 1,NA,as.character(baseline$l2fu_d)) baseline$l2fu_d <- as.Date(baseline$l2fu_d) baseline$european.clinic <- ifelse(baseline$region.factor == "Europe" & !is.na(baseline$region.factor),1, ifelse(!is.na(baseline$region.factor),0,NA)) baseline$european.clinic.factor <- factor(baseline$european.clinic, levels=c(0,1), labels=c("North American site","European site")) @ <>= # Need to find out who has baseline CD4 values > 31 days after or > 93 days before baseline$time_to_baseline_cd4 <- with(baseline, haart_d - cd4_t0_d) # Note: There is an error in how cd4_t0 derived in the baseline data set so must reconstruct in the cd4.data data set cd4.data$haart_d <- baseline[match(cd4.data$unique_id, baseline$unique_id, nomatch=NA),"haart_d"] cd4.data$time_from_haart_to_cd4 <- with(cd4.data, cd4_d - haart_d) do <- FALSE if(do){ junk <- lapply(split(cd4.data,cd4.data$unique_id), FUN=function(y){ y <- subset(y, !is.na(y[,'cd4_v'])) y <- y[order(y[,"cd4_d"]),] if(!is.na(y[1,"haart_d"])){ before.haart <- ifelse(y[,"haart_d"] >= y[,"cd4_d"],1,0) if(any(before.haart==1)){ min.time.before <- as.numeric(min(abs(y[which(before.haart==1),"time_from_haart_to_cd4"]),na.rm=TRUE)) which.row.time.before <- which(abs(y[which(before.haart==1),"time_from_haart_to_cd4"]) == min.time.before)[1] } else { min.time.before <- which.row.time.before <- NA } if(any(before.haart == 0)){ min.time.after <- as.numeric(min(abs(y[which(before.haart==0),"time_from_haart_to_cd4"]),na.rm=TRUE)) which.row.time.after <- which(abs(y[which(before.haart==0),"time_from_haart_to_cd4"]) == min.time.after)[1] } else { min.time.after <- which.row.time.after <- NA } if(min.time.before <= 93 & !is.na(min.time.before)){ cd4_t0_corrected <- y[before.haart==1,"cd4_v"][which.row.time.before] cd4_t0_d_corrected <- y[before.haart==1,"cd4_d"][which.row.time.before] } else if(min.time.after <= 31 & !is.na(min.time.after)){ cd4_t0_corrected <- y[before.haart==0,"cd4_v"][which.row.time.after] cd4_t0_d_corrected <- y[before.haart==0,"cd4_d"][which.row.time.after] } else { cd4_t0_corrected <- cd4_t0_d_corrected <- NA } } else { cd4_t0_corrected <- cd4_t0_d_corrected <- NA } df <- data.frame("unique_id"=y[1,"unique_id"], "cd4_t0_corrected"=cd4_t0_corrected, "cd4_t0_d_corrected"=cd4_t0_d_corrected) return(df)}) junk.df <- do.call("rbind",junk) baseline$cd4_t0_d_corrected <- junk.df[match(baseline$unique_id, junk.df$unique_id, nomatch=NA),"cd4_t0_d_corrected"] baseline$cd4_t0_corrected <- junk.df[match(baseline$unique_id, junk.df$unique_id, nomatch=NA),"cd4_t0_corrected"] baseline$cd4_diff <- with(baseline, cd4_t0 - cd4_t0_corrected) save(baseline,file="updated_baseline.rda",compress=TRUE) } #-------------------------------------------------------------------------------# vl.data$haart_d <- baseline[match(vl.data$unique_id, baseline$unique_id, nomatch=NA),"haart_d"] vl.data$time_from_haart_to_rna <- with(vl.data, rna_d - haart_d) do <- FALSE if(do){ load("updated_baseline.rda") junk <- lapply(split(vl.data,vl.data$unique_id), FUN=function(y){ y <- subset(y, !is.na(rna_v)) y <- y[order(y[,"rna_d"],y[,"rna_v"]),] if(!is.na(y[1,"haart_d"])){ unique_id <- y[1,"unique_id"] before.haart <- ifelse(y[,"haart_d"] >= y[,"rna_d"] & abs(y[,"time_from_haart_to_rna"]) <= 93,1, ifelse(y[,"haart_d"] < y[,"rna_d"] & y[,"time_from_haart_to_rna"] <= 31,0,NA)) y <- y[!is.na(before.haart),] before.haart <- before.haart[!is.na(before.haart)] if(any(before.haart == 1)){ min.time.before <- as.numeric(min(abs(y[which(before.haart==1),"time_from_haart_to_rna"]),na.rm=TRUE)) which.row.time.before <- which(abs(as.numeric(y[which(before.haart==1),"time_from_haart_to_rna"])) == min.time.before) if(length(which.row.time.before) > 1){ rna_t0_corrected <- max(y[which.row.time.before,"rna_v"],na.rm=TRUE) if(rna_t0_corrected == -1){ rna_t0_corrected <- max(y[which.row.time.before,"rna_l"],na.rm=TRUE) which.max.value <- which(y[which.row.time.before,"rna_v"]== -1)[1] } else { which.max.value <- which(y[which.row.time.before,"rna_v"]==rna_t0_corrected) } rna_t0_d_corrected <- y[which.row.time.before,"rna_d"][which.max.value] } else { rna_t0_corrected <- y[which.row.time.before,"rna_v"] rna_t0_d_corrected <- y[which.row.time.before,"rna_d"] if(rna_t0_corrected == -1) { rna_t0_corrected <- y[which.row.time.before,"rna_l"] } } } else if(any(before.haart == 0)){ min.time.after <- as.numeric(min(abs(y[which(before.haart==0),"time_from_haart_to_rna"]),na.rm=TRUE)) which.row.time.after <- which(abs(y[which(before.haart==0),"time_from_haart_to_rna"]) == min.time.after) if(length(which.row.time.after) > 1){ rna_t0_corrected <- max(y[which.row.time.after,"rna_v"],na.rm=TRUE) if(rna_t0_corrected == -1){ rna_t0_corrected <- max(y[which.row.time.after,"rna_l"],na.rm=TRUE) which.max.value <- which(y[which.row.time.after,"rna_v"]== -1)[1] } else { which.max.value <- which(y[which.row.time.after,"rna_v"]==rna_t0_corrected) } rna_t0_d_corrected <- y[which.row.time.after,"rna_d"][which.max.value] } else { rna_t0_corrected <- y[which.row.time.after,"rna_v"] rna_t0_d_corrected <- y[which.row.time.after,"rna_d"] if(rna_t0_corrected == -1) { rna_t0_corrected <- y[which.row.time.after,"rna_l"] } } } else { rna_t0_corrected <- rna_t0_d_corrected <- NA } } if(rna_t0_corrected %in% c(Inf,-Inf)){ rna_t0_corrected <- NA } rna_t0_corrected <- ifelse(rna_t0_corrected < 500 & !is.na(rna_t0_corrected), 500, rna_t0_corrected) df <- data.frame("unique_id"=unique_id, "rna_t0_corrected"=rna_t0_corrected, "rna_t0_d_corrected"=rna_t0_d_corrected) return(df)}) junk.df <- do.call("rbind",junk) baseline$rna_t0_d_corrected <- junk.df[match(baseline$unique_id, junk.df$unique_id, nomatch=NA),"rna_t0_d_corrected"] baseline$rna_t0_corrected <- junk.df[match(baseline$unique_id, junk.df$unique_id, nomatch=NA),"rna_t0_corrected"] baseline$rna_t0_corrected <- ifelse(is.na(baseline$rna_t0_corrected) & baseline$rna_t0 == 1,500,baseline$rna_t0_corrected) baseline$rna_diff <- with(baseline, rna_t0 - rna_t0_corrected) save(baseline,file="updated_baseline.rda",compress=TRUE) } hemoglobin$haart_d <- baseline[match(hemoglobin$unique_id, baseline$unique_id, nomatch=NA),"haart_d"] hemoglobin$time_from_haart_to_hemoglobin <- with(hemoglobin, lab_d - haart_d) do <- FALSE if(do){ junk <- lapply(split(hemoglobin,hemoglobin$unique_id), FUN=function(y){ y <- y[order(y[,"lab_d"]),] if(!is.na(y[1,"haart_d"])){ before.haart <- ifelse(y[,"haart_d"] >= y[,"lab_d"],1,0) if(any(before.haart==1)){ min.time.before <- as.numeric(min(abs(y[which(before.haart==1),"time_from_haart_to_hemoglobin"]),na.rm=TRUE)) which.row.time.before <- which(abs(y[which(before.haart==1),"time_from_haart_to_hemoglobin"]) == min.time.before)[1] } else { min.time.before <- which.row.time.before <- NA } if(any(before.haart == 0)){ min.time.after <- as.numeric(min(abs(y[which(before.haart==0),"time_from_haart_to_hemoglobin"]),na.rm=TRUE)) which.row.time.after <- which(abs(y[which(before.haart==0),"time_from_haart_to_hemoglobin"]) == min.time.after)[1] } else { min.time.after <- which.row.time.after <- NA } #if(min.time.before <= 93 & !is.na(min.time.before)){ if(min.time.before <= 181 & !is.na(min.time.before)){ hemoglobin_t0_corrected <- y[before.haart==1,"lab_v"][which.row.time.before] hemoglobin_t0_d_corrected <- y[before.haart==1,"lab_d"][which.row.time.before] } else if(min.time.after <= 31 & !is.na(min.time.after)){ hemoglobin_t0_corrected <- y[before.haart==0,"lab_v"][which.row.time.after] hemoglobin_t0_d_corrected <- y[before.haart==0,"lab_d"][which.row.time.after] } else { hemoglobin_t0_corrected <- hemoglobin_t0_d_corrected <- NA } } else { hemoglobin_t0_corrected <- hemoglobin_t0_d_corrected <- NA } df <- data.frame("unique_id"=y[1,"unique_id"], "hemoglobin_t0_corrected"=hemoglobin_t0_corrected, "hemoglobin_t0_d_corrected"=hemoglobin_t0_d_corrected) return(df)}) junk.df <- do.call("rbind",junk) baseline$hemoglobin_t0_d_corrected <- junk.df[match(baseline$unique_id, junk.df$unique_id, nomatch=NA),"hemoglobin_t0_d_corrected"] baseline$hemoglobin_t0_corrected <- junk.df[match(baseline$unique_id, junk.df$unique_id, nomatch=NA),"hemoglobin_t0_corrected"] save(baseline,file="updated_baseline.rda",compress=TRUE) } load("updated_baseline.rda") # Calculating corrected log10 viral load from corrected baseline viral load baseline$lrna_t0_corrected <- log10(baseline$rna_t0_corrected) # Still need to subset out those who have no known country of origin or race baseline$diff. <- with(baseline, clinfup_d - haart_d) sub.baseline <- subset(baseline, (!is.na(origin_g) | !is.na(combined)) & haart_d < clinfup_d) sub.baseline <- upData(sub.baseline, drop="recruit_d") # Defining whether someone had an ADE in the first year of HAART that was not TUB # Could have multiple ADEs as first ADE so have to check ade1 <- apply(subset(sub.baseline, select=c(unique_id,event1,event_d1, ade_first_year_of_haart,end_of_first_year_haart)),1,FUN=function(y){ if(y["ade_first_year_of_haart"]==1 & y["event1"] != "TUB" & !is.na(y["event1"])){ ade1 <- 1 } else { ade1 <- 0 } }) sub.baseline <- cbind(sub.baseline, ade1) ade2 <- apply(subset(sub.baseline, select=c(unique_id,event2,event_d2,ade1,clinfup_d, ade_first_year_of_haart,end_of_first_year_haart)),1,FUN=function(y){ if(y["event_d2"] <= y["end_of_first_year_haart"] & y["event_d2"] <= y["clinfup_d"] & y["ade1"] != 1 & y["event2"] != "TUB" & !is.na(y["event2"])){ ade2 <- 1 } else { ade2 <- 0 } }) sub.baseline <- cbind(sub.baseline, ade2) ade3 <- apply(subset(sub.baseline, select=c(unique_id,event3,event_d3,ade1,ade2,clinfup_d, ade_first_year_of_haart,end_of_first_year_haart)),1,FUN=function(y){ if(y["event_d3"] <= y["end_of_first_year_haart"] & y["event_d3"] <= y["clinfup_d"] & y["ade1"] != 1 & y["ade2"] != 1 & y["event3"] != "TUB" & !is.na(y["event3"])){ ade3 <- 1 } else { ade3 <- 0 } }) sub.baseline <- cbind(sub.baseline, ade3) ade4 <- apply(subset(sub.baseline, select=c(unique_id,event4,event_d4,ade1,ade2,ade3,clinfup_d, ade_first_year_of_haart,end_of_first_year_haart)),1,FUN=function(y){ if(y["event_d4"] <= y["end_of_first_year_haart"] & y["event_d4"] <= y["clinfup_d"] & y["ade1"] != 1 & y["ade2"] != 1 & y["ade3"] != 1 & y["event4"] != "TUB" & !is.na(y["event4"])){ ade4 <- 1 } else { ade4 <- 0 } }) sub.baseline <- cbind(sub.baseline, ade4) # ADEs in first year of HAART, regardless if first or not junk <- subset(sub.baseline, select=c(unique_id, event_d1, event_d2, event_d3, event_d4, event_d5, event_d6, event_d7, event_d8, end_of_first_year_haart)) junk.long <- reshape(junk, varying=list(c("event_d1","event_d2","event_d3","event_d4","event_d5","event_d6","event_d7","event_d8")), timevar="ADE_number",times=c(1:8),v.names="Date",direction="long") junk.long <- junk.long[,-5] junk.long <- junk.long[order(junk.long$unique_id, junk.long$Date),] junk2 <- subset(sub.baseline, select=c(unique_id,event1,event2,event3,event4,event5,event6,event7,event8)) junk.long2 <- reshape(junk2, varying=list(c("event1","event2","event3","event4","event5","event6","event7","event8")), timevar="ADE_number",times=c(1:8),v.names="ADE",direction="long") junk.long2 <- junk.long2[order(junk.long2$unique_id, junk.long2$ADE_number),] junk.long2 <- junk.long2[,-4] junk3 <- unique(subset(sub.baseline, select=c(unique_id,clinfup_d))) stuff <- merge(junk.long,junk.long2, all=TRUE) stuff$clinfup_d <- junk3[match(stuff$unique_id,junk3$unique_id,nomatch=NA),"clinfup_d"] stuff2 <- subset(stuff, Date <= end_of_first_year_haart & Date <= clinfup_d & !is.na(Date)) stuff2 <- stuff2[order(stuff2$unique_id, stuff2$ADE_number),] which.duplicated <- (which(duplicated(subset(stuff2,select=c(unique_id,Date,ADE))))) stuff3 <- stuff2[-which.duplicated,] tmp <- unique(subset(sub.baseline, unique_id %in% stuff3$unique_id, select=c(unique_id,artcc_origin_g.factor,native.born.factor, combined.factor,closeDate,clinfup_d,dthfup_d,cenf_d,cennf_d, death_d,l2fu_d,end_of_first_year_haart,haart_d))) stuff3$artcc_origin_g.factor <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"artcc_origin_g.factor"] stuff3$native.born.factor <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"native.born.factor"] stuff3$combined.factor <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"combined.factor"] stuff3$closeDate <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"closeDate"] stuff3$clinfup_d <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"clinfup_d"] stuff3$dthfup_d <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"dthfup_d"] stuff3$cenf_d <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"cenf_d"] stuff3$cennf_d <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"cennf_d"] stuff3$death_d <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"death_d"] stuff3$l2fu_d <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"l2fu_d"] stuff3$end_of_first_year_haart <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"end_of_first_year_haart"] stuff3$haart_d <- tmp[match(stuff3$unique_id,tmp$unique_id,nomatch=NA),"haart_d"] # Defining incidence_fup sub.baseline$incidence_end_of_fup <- apply(subset(sub.baseline, select=c(death_d,l2fu_d,end_of_first_year_haart,closeDate)),1,FUN=min,na.rm=TRUE) sub.baseline$incidence_end_of_fup <- as.Date(sub.baseline$incidence_end_of_fup) sub.baseline$incidence_fup <- as.numeric(with(sub.baseline, (incidence_end_of_fup - haart_d)/365.25)) tr.go <- tapply(sub.baseline$incidence_fup, INDEX=sub.baseline$artcc_origin_g.factor, FUN=sum, na.rm=TRUE) tr.go.df <- data.frame("artcc_origin_g.factor"=c(levels(stuff3$artcc_origin_g.factor)), "go.incidence_fup"=as.numeric(tr.go)) sub.baseline$go.incidence_fup <- tr.go.df[match(sub.baseline$artcc_origin_g.factor,tr.go.df$artcc_origin_g.factor,nomatch=NA), "go.incidence_fup"] tr.nb <- tapply(sub.baseline$incidence_fup, INDEX=sub.baseline$native.born.factor, FUN=sum, na.rm=TRUE) tr.nb.df <- data.frame("native.born.factor"=c(levels(sub.baseline$native.born.factor)), "nb.incidence_fup"=as.numeric(tr.nb)) tr.nb.df <- rbind(tr.nb.df,data.frame('native.born.factor'='Combined', 'nb.incidence_fup'=sum(sub.baseline$incidence_fup[which(!is.na(sub.baseline$native.born.factor))]))) sub.baseline$nb.incidence_fup <- tr.nb.df[match(sub.baseline$native.born.factor,tr.nb.df$native.born.factor,nomatch=NA), "nb.incidence_fup"] tr.race <- tapply(sub.baseline$incidence_fup, INDEX=sub.baseline$combined.factor, FUN=sum,na.rm=TRUE) tr.race.df <- data.frame("combined.factor"=c(levels(sub.baseline$combined.factor)), "race.incidence_fup"=as.numeric(tr.race)) sub.baseline$race.incidence_fup <- tr.race.df[match(sub.baseline$combined.factor,tr.race.df$combined.factor,nomatch=NA), "race.incidence_fup"] incidence_fup <- unique(subset(sub.baseline, unique_id %in% stuff3$unique_id, select=c(unique_id,go.incidence_fup,nb.incidence_fup, race.incidence_fup,incidence_end_of_fup, incidence_fup))) stuff3$incidence_fup <- incidence_fup[match(stuff3$unique_id,incidence_fup$unique_id,nomatch=NA),"incidence_fup"] stuff3$incidence_end_of_fup <- incidence_fup[match(stuff3$unique_id,incidence_fup$unique_id,nomatch=NA),"incidence_end_of_fup"] stuff3$go.incidence_fup <- incidence_fup[match(stuff3$unique_id,incidence_fup$unique_id,nomatch=NA),"go.incidence_fup"] stuff3$nb.incidence_fup <- incidence_fup[match(stuff3$unique_id,incidence_fup$unique_id,nomatch=NA),"nb.incidence_fup"] stuff3$race.incidence_fup <- incidence_fup[match(stuff3$unique_id,incidence_fup$unique_id,nomatch=NA),"race.incidence_fup"] stuff3$origin_ade <- paste(stuff3$artcc_origin_g.factor,stuff3$ADE,sep=".") stuff3$origin_ade.factor <- factor(stuff3$origin_ade, levels=sort(levels(interaction(stuff3$artcc_origin_g.factor,stuff3$ADE)))) stuff3$native.born.factor <- factor(stuff3$native.born, levels=c('Yes','No')) stuff3$native_ade <- paste(stuff3$native.born.factor, stuff3$ADE,sep=".") stuff3$native_ade.factor <- factor(stuff3$native_ade, levels=sort(levels(interaction(stuff3$native.born.factor,stuff3$ADE)))) stuff3$combined_ade <- paste(stuff3$combined.factor,stuff3$ADE,sep=".") stuff3$combined_ade.factor <- factor(stuff3$combined_ade, levels=sort(levels(interaction(stuff3$combined.factor,stuff3$ADE)))) # None past event3 # Now need to define end of follow-up based on when the person had the non-TUB ADE in the first year of HAART sub.baseline$non_tb_event_d <- ifelse(sub.baseline$ade1 == 1, as.character(sub.baseline$event_d1), ifelse(sub.baseline$ade2 == 1, as.character(sub.baseline$event_d2), ifelse(sub.baseline$ade3 == 1, as.character(sub.baseline$event_d3),NA))) sub.baseline$non_tb_event_d <- as.Date(sub.baseline$non_tb_event_d, format="%Y-%m-%d") sub.baseline$non_tb_first_year_of_haart <- ifelse(sub.baseline$ade1 == 1 | sub.baseline$ade2 == 1 | sub.baseline$ade3 == 1 | sub.baseline$ade4 == 1,1,0) sub.baseline$non_tb_first_year_of_haart.factor <- factor(sub.baseline$non_tb_first_year_of_haart,levels=c(0,1), labels=c("No","Yes")) sub.baseline$non_tb_or_death_first_year_of_haart <- ifelse(sub.baseline$non_tb_first_year_of_haart == 1 | sub.baseline$death_first_year_of_haart == 1,1,0) sub.baseline$non_tb_or_death_first_year_of_haart.factor <- factor(sub.baseline$non_tb_or_death_first_year_of_haart, levels=c(0,1), labels=c("No","Yes")) @ <>= # Table of ADEs by GO, NB, E/R sub.baseline$ade <- ifelse(!is.na(sub.baseline$event_d1) & sub.baseline$event_d1 <= sub.baseline$end_of_first_year_haart & sub.baseline$event_d1 >= sub.baseline$haart_d & sub.baseline$event_d1 <= sub.baseline$clinfup_d,1,0) sub.baseline$ade.factor <- factor(sub.baseline$ade, levels=c(0,1), labels=c("No","Yes")) summ.ade.go <- summary(artcc_origin_g.factor ~ ade.factor, data=sub.baseline, method="reverse", overall=TRUE) summ.ade.go$labels <- "ADE" latex(summ.ade.go, file="", caption="Distribution of ADEs across Geographic Origin.",exclude1=FALSE, long=TRUE, where="!h") # Listing specific ADEs # event in first year of haart (and not just the first ADE in the first year of HAART but all in the first year of HAART) stuff3$ADE_name <- event_codes[match(stuff3$ADE, event_codes$event, nomatch=NA),"diseasedescription"] stuff3$ADE_name_short <- event_codes[match(stuff3$ADE, event_codes$event, nomatch=NA),'diseasedescription.short'] cj <- lapply(split(subset(stuff3, select=c(unique_id, go.incidence_fup,artcc_origin_g.factor,ADE,origin_ade.factor)), stuff3$origin_ade.factor),FUN=function(y){ if(dim(y)[1] > 0){ count <- dim(y)[1] tot_person_yr <- y[1,"go.incidence_fup"] incidence_rate <- count/tot_person_yr*1000 } else { count <- tot_person_yr <- incidence_rate <- 0 } df <- data.frame("origin_ade"=y[1,"origin_ade.factor"], "count"=count, "incidence_rate"=incidence_rate, "tot_person_yr"=tot_person_yr) return(df)}) cj.df <- do.call("rbind",cj) which.rows <- which(is.na(cj.df$origin_ade)) cj.df$origin_ade[which.rows] <- rownames(cj.df)[which.rows] cj.df$artcc_origin_g <- sapply(strsplit(as.character(cj.df$origin_ade), "\\."), function(x){x[[1]]}) cj.df$artcc_origin_g.factor <- factor(cj.df$artcc_origin_g, levels=c("WEST","NAME","SSA","ASIA","LA","Unknown")) cj.df$ADE <- sapply(strsplit(as.character(cj.df$origin_ade), "\\."), function(x){x[[2]]}) cj.df$incidence_rate <- format(round(cj.df$incidence_rate,2),nsmall=2) cj.df <- subset(cj.df, artcc_origin_g.factor != 'Unknown') cj.df$ADE <- factor(cj.df$ADE) # Getting cumulative incidence for geographic origin col1 <- paste('Combined',sort(unique(stuff3$ADE)),sep='.') col2 <- as.numeric(tapply(cj.df$count[which(cj.df$artcc_origin_g.factor != 'Unknown')], INDEX=cj.df$ADE[which(cj.df$artcc_origin_g.factor != 'Unknown')], FUN=sum)) col2b <- as.numeric(tapply(cj.df$tot_person_yr[which(cj.df$artcc_origin_g.factor != 'Unknown')], INDEX=cj.df$ADE[which(cj.df$artcc_origin_g.factor != 'Unknown')], FUN=sum)) col3 <- format(round(col2/col2b*1000,2),nsmall=2) col4 <- col2b col5 <- rep('Combined',times=length(col1)) col6 <- col5 col7 <- sort(unique(stuff3$ADE)) cj.df <- rbind(cj.df, data.frame('origin_ade'=col1, 'count'=col2, 'incidence_rate'=col3, 'tot_person_yr'=col4, 'artcc_origin_g'=col5, 'artcc_origin_g.factor'=col6, 'ADE'=col7)) # Need to order in decreasing order tbl.order <- table(stuff3$ADE[which(stuff3$artcc_origin_g.factor != 'Unknown')]) tbl.df <- data.frame('ADE'=names(tbl.order), 'Total count'=as.numeric(tbl.order)) cj.df <- merge(cj.df, tbl.df, all=TRUE) cj.df <- cj.df[order(cj.df$Total.count,cj.df$ADE,cj.df$artcc_origin_g.factor,decreasing=TRUE),] cj.df <- subset(cj.df, !is.na(Total.count)) cj.df$ADE.short <- event_codes[match(cj.df$ADE,event_codes$event,nomatch=NA),'diseasedescription.short'] which.west <- which(cj.df$artcc_origin_g.factor == "WEST") which.name <- which(cj.df$artcc_origin_g.factor == "NAME") which.ssa <- which(cj.df$artcc_origin_g.factor == "SSA") which.asia <- which(cj.df$artcc_origin_g.factor == "ASIA") which.la <- which(cj.df$artcc_origin_g.factor == "LA") which.unknown <- which(cj.df$artcc_origin_g.factor == "Unknown") which.combined <- which(cj.df$artcc_origin_g.factor == 'Combined') inc.go.specific <- data.frame("ADE"=cj.df$ADE.short[seq(1,dim(cj.df)[1],by=6)], "West"=paste(cj.df[which.west,"incidence_rate"]," (", cj.df[which.west,"count"], ")",sep=""), "NAME"=paste(cj.df[which.name,"incidence_rate"]," (", cj.df[which.name,"count"], ")",sep=""), "SSA"=paste(cj.df[which.ssa,"incidence_rate"]," (", cj.df[which.ssa,"count"], ")",sep=""), "Asia"=paste(cj.df[which.asia,"incidence_rate"]," (", cj.df[which.asia,"count"], ")",sep=""), "LA"=paste(cj.df[which.la,"incidence_rate"]," (", cj.df[which.la,"count"], ")",sep=""), 'Combined'=paste(cj.df[which.combined,'incidence_rate'],' (', cj.df[which.combined,'count'], ')',sep='')) latex(inc.go.specific, file="", rowname=NULL,insert.bottom = "N (WEST)=36367; N (NAME) = 1111; N (SSA) = 7846; N (ASIA) = 793; N (LA) = 2737; N (Unknown) = 17038. Frequencies represent total number of events per geographic origin. Total follow-up time: WEST=33160.78 pr-yr; NAME=1043.56 pr-yr; SSA=7113.26 pr-yr; Asia=740.83 pr-yr; LA=2480.34 pr-yr; Unknown=16074.84 pr-yr.",caption="Incidence rate of ADEs in the first year of HAART across geographic origin listed as Incidence Rate (Frequency).",where="!h",col.just=c("l","r","r","r","r","r","r"),landscape=TRUE,size="small") summ.ade.nb <- summary(native.born.factor ~ ade.factor, data=sub.baseline, method="reverse",overall=TRUE) summ.ade.nb$labels <- "ADE" latex(summ.ade.nb, file="", caption="Distribution of ADEs across Native Born status.",exclude1=FALSE, long=TRUE, where="!h") # Listing specific ADEs # event in first year of haart (and not just the first ADE in the first year of HAART but all in the first year of HAART) cj <- lapply(split(subset(stuff3,select=c(unique_id, nb.incidence_fup,native.born.factor,ADE,native_ade.factor)), stuff3$native_ade.factor),FUN=function(y){ if(dim(y)[1] > 0){ count <- dim(y)[1] # tot_person_yr <- sum(y[,"incidence_fup"],na.rm=TRUE) tot_person_yr <- y[1,"nb.incidence_fup"] incidence_rate <- count/tot_person_yr*1000 } else { count <- tot_person_yr <- incidence_rate <- 0 } df <- data.frame("native_ade"=y[1,"native_ade.factor"], "count"=count, "incidence_rate"=incidence_rate, "tot_person_yr"=tot_person_yr) return(df)}) cj.df <- do.call("rbind",cj) which.rows <- which(is.na(cj.df$native_ade)) cj.df$native_ade[which.rows] <- rownames(cj.df)[which.rows] cj.df$native.born <- sapply(strsplit(as.character(cj.df$native_ade), "\\."), function(x){x[[1]]}) cj.df$ADE <- sapply(strsplit(as.character(cj.df$native_ade), "\\."), function(x){x[[2]]}) cj.df$ADE <- factor(cj.df$ADE) cj.df$incidence_rate <- format(round(cj.df$incidence_rate,2),nsmall=2) # Getting cumulative incidence for combined native born status col1 <- paste('Combined',sort(unique(stuff3$ADE)),sep='.') col2 <- as.numeric(tapply(cj.df$count, INDEX=cj.df$ADE, FUN=sum)) col2b <- as.numeric(tapply(cj.df$tot_person_yr, INDEX=cj.df$ADE, FUN=sum)) col3 <- format(round(col2/col2b*1000,2),nsmall=2) col4 <- col2b col5 <- rep('Combined',times=length(col1)) col6 <- sort(unique(stuff3$ADE)) cj.df <- rbind(cj.df, data.frame('native_ade'=col1, 'count'=col2, 'incidence_rate'=col3, 'tot_person_yr'=col4, 'native.born'=col5, 'ADE'=col6)) # Need to order in decreasing order tbl.order <- table(stuff3$ADE[which(!is.na(stuff3$native.born.factor))]) tbl.df <- data.frame('ADE'=names(tbl.order), 'Total count'=as.numeric(tbl.order)) cj.df <- merge(cj.df, tbl.df, all=TRUE) cj.df <- cj.df[order(cj.df$Total.count,cj.df$ADE,cj.df$native.born,decreasing=TRUE),] cj.df <- subset(cj.df, !is.na(Total.count)) cj.df$ADE.short <- event_codes[match(cj.df$ADE,event_codes$event,nomatch=NA),'diseasedescription.short'] which.yes <- which(cj.df$native.born == "Yes") which.no <- which(cj.df$native.born == "No") which.combined <- which(cj.df$native.born == 'Combined') inc.nb.specific <- data.frame("ADE"=cj.df$ADE.short[seq(1,dim(cj.df)[1],by=3)], "Yes"=paste(cj.df[which.yes,"incidence_rate"]," (", cj.df[which.yes,"count"], ")",sep=""), "No"=paste(cj.df[which.no,"incidence_rate"]," (", cj.df[which.no,"count"], ")",sep=""), 'Combined'=paste(cj.df[which.combined,"incidence_rate"]," (", cj.df[which.combined,"count"], ")",sep="")) latex(inc.nb.specific, file="", rowname=NULL,insert.bottom = "N (Yes)=36367; N (No) = 12487. Frequencies represent total number of events per native born status. Total follow-up: NB=33160.78 pr-yr; non-NB=11378 pr-yr.",caption="Incidence rate of ADEs in the first year of HAART across Native Born status listed as Incidence Rate (Frequency).",where="!h",col.just=c("l","r","r","r"),landscape=TRUE,size="small") summ.ade.er <- summary(combined.factor ~ ade.factor, data=sub.baseline, method="reverse",overall=TRUE) summ.ade.er$labels <- "ADE" latex(summ.ade.er, file="", caption="Distribution of ADEs across Ethnicity/Race.",exclude1=FALSE,long=TRUE, where="!h",size="small") # Listing specific ADEs # event in first year of haart (and not just the first ADE in the first year of HAART but all in the first year of HAART) cj <- lapply(split(subset(stuff3, select=c(unique_id, race.incidence_fup,combined.factor,ADE,combined_ade.factor)), stuff3$combined_ade.factor),FUN=function(y){ if(dim(y)[1] > 0){ count <- dim(y)[1] tot_person_yr <- y[1,"race.incidence_fup"] incidence_rate <- count/tot_person_yr*1000 } else { count <- tot_person_yr <- incidence_rate <- 0 } df <- data.frame("combined_ade"=y[1,"combined_ade.factor"], "count"=count, "incidence_rate"=incidence_rate, "tot_person_yr"=tot_person_yr) return(df)}) cj.df <- do.call("rbind",cj) which.rows <- which(is.na(cj.df$combined_ade)) cj.df$combined_ade[which.rows] <- rownames(cj.df)[which.rows] cj.df$combined.factor <- sapply(strsplit(as.character(cj.df$combined_ade), "\\."), function(x){x[[1]]}) cj.df$ADE <- sapply(strsplit(as.character(cj.df$combined_ade), "\\."), function(x){x[[2]]}) cj.df$ADE <- factor(cj.df$ADE) cj.df$incidence_rate <- format(round(cj.df$incidence_rate,2),nsmall=2) which.white <- which(cj.df$combined.factor == "White") which.black <- which(cj.df$combined.factor == "Black") which.hispanic <- which(cj.df$combined.factor == "Hispanic") which.asian <- which(cj.df$combined.factor == "Asian") which.arab <- which(cj.df$combined.factor == "Arab") which.indigenous <- which(cj.df$combined.factor == "Indigenous") which.unspecified <- which(cj.df$combined.factor == "Unspecified") inc.race.specific <- data.frame("ADE"=c("Bacterial pneumonia", "Candid. unspecified", "Coccidioidomycosis dissem or extrapulm", "CMV - other location", "Cryptococcosis, extrapulm.", "AIDS dementia complex", "Hodgkin", "Histoplasmosis, extrapulm.", "HSV ulcers or pneumonitis/esophagitis", "Cervical cancer", "Isosporiasis diarrhoea", "Kaposi's sarcoma", "Leishmaniasis, visceral", "Non-Hodgkin - primary brain", "MAC or Kansasii, extrapulm.", "Mycobacterial extrapulm, other type, specify", "Non-Hodgkin - unknonw/other histology", "Other", "PCP P jiroveci infection", "Progressive multifocal leucoencephalopathy", "CMV chorioretinitis", "Salmonella bacteriaemia (non-typhoid)", "Cryptosporidosis (duration > 1 month)", "Unidentified AIDS event", "Toxoplasmosis, brain", "Mycobacterial TB, location unspecified", "Unknown", "HIV wasting syndrome"), "White"=paste(cj.df[which.white,"incidence_rate"]," (", cj.df[which.white,"count"], ")",sep=""), "Black"=paste(cj.df[which.black,"incidence_rate"]," (", cj.df[which.black,"count"], ")",sep=""), "Hispanic"=paste(cj.df[which.hispanic,"incidence_rate"]," (", cj.df[which.hispanic,"count"], ")",sep=""), "Asian"=paste(cj.df[which.asian,"incidence_rate"]," (", cj.df[which.asian,"count"], ")",sep=""), "Arab"=paste(cj.df[which.arab,"incidence_rate"]," (", cj.df[which.arab,"count"], ")",sep=""), "Indigenous"=paste(cj.df[which.indigenous,"incidence_rate"]," (", cj.df[which.indigenous,"count"], ")",sep=""), "Unspecified"=paste(cj.df[which.unspecified,"incidence_rate"]," (", cj.df[which.unspecified,"count"], ")",sep="")) latex(inc.race.specific, file="", rowname=NULL,insert.bottom = "N (White)= 40509; N (Black) = 13200; N (Hispanic) = 3233; N (Asian) = 900; N (Arab) = 1048; N (Indigenous) = 312; N (Unspecified) = 6690. Frequencies represent total number of events per ethnicity/race. Total follow-up: White=37141.80 pr-yr; Black=12193.26 pr-yr; Hispanic=2946.63 pr-yr; Asian=845.13 pr-yr; Arab=986.02 pr-yr; Indigenous=273.78 pr-yr; Unspecified=6227.00 pr-yr.",caption="Incidence rate of ADEs in the first year of HAART across Race/ethnicity status listed as Incidence Rate (Frequency).",where="!h",col.just=c("l","r","r","r","r","r","r","r"),landscape=TRUE,size="small") @ %--------------------------------------------------% % Comparisons across Region, GO, and E/R % %--------------------------------------------------% <>= summ.region <- summary(region.factor ~ gender.factor + cd4_t0_corrected + age + pi_based.factor + nnrti_based.factor + other_based.factor + rlt_based.factor + risk.factor + lrna_t0_corrected + stagec.factor + hemoglobin_t0_corrected + haartyr + l2fu.factor, data=sub.baseline, method="reverse",overall=TRUE,test=TRUE,digits=2) summ.region$labels <- c("Sex","Baseline CD4","Baseline age","PI-based initial ART", "NNRTI-based initial ART","Other-based initial ART","RLT-based initial ART","Probable route of infection", "Baseline log viral load","ADE at ART initiation","Baseline hemoglobin", "Yr of HAART initiation","Lost-to-followup") latex(summ.region, file="", prtest="P",exclude1=FALSE,long=TRUE,digits=2,caption="Descriptive statistics for variables of interest across region of enrollment.",landscape=TRUE) @ <>= summ.go <- summary(artcc_origin_g.factor ~ gender.factor + region.factor + cd4_t0_corrected + age + pi_based.factor + nnrti_based.factor + other_based.factor + rlt_based.factor + lrna_t0_corrected + stagec.factor + hemoglobin_t0_corrected + haartyr + l2fu.factor + risk.factor, data=sub.baseline, method="reverse",overall=TRUE,test=TRUE,digits=2) summ.go$labels <- c("Sex","Region of enrollment","Baseline CD4","Age at ART initiation","PI-based initial ART", "NNRTI-based initial ART","Other-based initial ART","RLT-based initial ART","Baseline log viral load", "ADE at ART initiation", "Baseline hemoglobin","Yr of HAART initiation","Lost-to-followup", 'Probable route of infection') latex(summ.go, file="", prtest="P",exclude1=FALSE,long=TRUE,digits=2,caption="Descriptive statistics for variables of interest across geographic origin.",size="small",landscape=TRUE) # Redoing above omitting those with 'Unknown' geographic origin sub.baseline$artcc_origin_g.factor.full <- sub.baseline$artcc_origin_g.factor sub.baseline$artcc_origin_g.factor <- factor(sub.baseline$artcc_origin_g.factor, levels=c("WEST","NAME","SSA","ASIA","LA")) summ.go.reduced <- summary(artcc_origin_g.factor ~ gender.factor + region.factor + cd4_t0_corrected + age + pi_based.factor + nnrti_based.factor + other_based.factor + rlt_based.factor + lrna_t0_corrected + stagec.factor + hemoglobin_t0_corrected + haartyr + l2fu.factor + risk.factor, data=sub.baseline, method="reverse",overall=TRUE,test=TRUE,digits=2) summ.go.reduced$labels <- c("Sex","Region of enrollment","Baseline CD4","Age at ART initiation","PI-based initial ART", "NNRTI-based initial ART","Other-based initial ART","RLT-based initial ART","Baseline log viral load", "ADE at ART initiation", "Baseline hemoglobin","Yr of HAART initiation","Lost-to-followup", 'Probable route of infection') latex(summ.go.reduced, file="", prtest="P",exclude1=FALSE,long=TRUE,digits=2,caption="Descriptive statistics for variables of interest across geographic origin treating those with 'Unknown' geographic origin as missing.",size="small",landscape=TRUE) @ <>= summ.native <- summary(native.born.factor ~ gender.factor + cd4_t0_corrected + age + pi_based.factor + nnrti_based.factor + other_based.factor + rlt_based.factor + risk.factor + lrna_t0_corrected + stagec.factor + hemoglobin_t0_corrected + haartyr + l2fu.factor, data=sub.baseline, method="reverse",overall=TRUE,test=TRUE,digits=2) summ.native$labels <- c("Sex","Baseline CD4","Baseline age","PI-based initial ART", "NNRTI-based initial ART","Other-based initial ART","RLT-based initial ART","Probable route of infection", "Baseline log viral load","ADE at ART initiation","Baseline hemoglobin", "Yr of HAART initiation","Lost-to-followup") latex(summ.native, file="", prtest="P",exclude1=FALSE,long=TRUE,digits=2,caption="Descriptive statistics for variables of interest across native born status.",landscape=TRUE) # Redoing above table omitting those with 'Unknown' native born status: sub.baseline$native.born.factor.full <- sub.baseline$native.born.factor sub.baseline$native.born.factor <- factor(sub.baseline$native.born.factor, levels=c("Yes","No")) summ.native.reduced <- summary(native.born.factor ~ gender.factor + cd4_t0_corrected + age + pi_based.factor + nnrti_based.factor + other_based.factor + rlt_based.factor + risk.factor + lrna_t0_corrected + stagec.factor + hemoglobin_t0_corrected + haartyr + l2fu.factor, data=sub.baseline, method="reverse",overall=TRUE,test=TRUE,digits=2) summ.native.reduced$labels <- c("Sex","Baseline CD4","Baseline age","PI-based initial ART", "NNRTI-based initial ART","Other-based initial ART","RLT-based initial ART","Probable route of infection", "Baseline log viral load","ADE at ART initiation","Baseline hemoglobin", "Yr of HAART initiation","Lost-to-followup") latex(summ.native.reduced, file="", prtest="P",exclude1=FALSE,long=TRUE,digits=2,caption="Descriptive statistics for variables of interest across native born status treating those with 'Unknown' status as missing.",landscape=TRUE) @ <>= # Adding in baseline viral load information created by Bryan using vl-data-manip.R load('baselineVLdata.Rda') sub.baseline$rna_t0_corrected2 <- baseline.vl.data[match(sub.baseline$unique_id,baseline.vl.data$unique_id,nomatch=NA),'VL'] @ %-------------------------------------------------------------% % Unadjusted and Adjusted Analyses with ADE outcome % %-------------------------------------------------------------% %---------------------------------------------------------------------------------% % All comers analysis % %---------------------------------------------------------------------------------% <>= #-------------------------------------# # Follow-up time in first year # #-------------------------------------# sub.baseline$time_to_any_ade <- with(sub.baseline, event_d1 - haart_d) end_of_follow_up_time_first_year <- apply(subset(sub.baseline, select=c(l2fu_d,event_d1,clinfup_d,death_d,end_of_first_year_haart,closeDate)),1, FUN=min,na.rm=TRUE) sub.baseline$end_of_follow_up_time_first_year <- as.Date(end_of_follow_up_time_first_year) sub.baseline$followup_time_first_year <- with(sub.baseline, as.numeric(end_of_follow_up_time_first_year - haart_d)/365.25) end_of_follow_up <- apply(subset(sub.baseline, select=c(l2fu_d,event_d1,clinfup_d,death_d,closeDate)),1, FUN=min,na.rm=TRUE) sub.baseline$end_of_follow_up <- as.Date(end_of_follow_up) sub.baseline$followup_time <- with(sub.baseline, as.numeric(end_of_follow_up - haart_d)/365.25) sub.baseline$time_to_non_tb_ade <- with(sub.baseline, non_tb_event_d - haart_d) end_of_follow_up_time_non_tb_first_year <- apply(subset(sub.baseline, select=c(l2fu_d,non_tb_event_d,death_d,clinfup_d, end_of_first_year_haart,closeDate)),1,FUN=min,na.rm=TRUE) sub.baseline$end_of_follow_up_non_tb_first_year <- as.Date(end_of_follow_up_time_non_tb_first_year) sub.baseline$followup_time_non_tb_first_year <- with(sub.baseline, as.numeric(end_of_follow_up_non_tb_first_year - haart_d)/365.25) end_of_followup_time_l2fu_first_year <- apply(subset(sub.baseline, select=c(event_d1,death_d,l2fu_d,end_of_first_year_haart,closeDate)), 1,FUN=min,na.rm=TRUE) sub.baseline$end_of_followup_time_l2fu_first_year <- as.Date(end_of_followup_time_l2fu_first_year) sub.baseline$followup_time_l2fu_first_year <- with(sub.baseline, as.numeric((end_of_followup_time_l2fu_first_year - haart_d))/365.25) #------------------------------------------------------------------------------------------------------------# # Geographic Origin Analyses # #------------------------------------------------------------------------------------------------------------# sub.baseline <- upData(sub.baseline, drop=c("event_d8","event8","lost_d")) ddist <- datadist(sub.baseline) options(datadist='ddist') geo.baseline <- subset(sub.baseline, !is.na(artcc_origin_g.factor)) geo.baseline <- upData(geo.baseline, drop="event_d7") ddist <- datadist(geo.baseline) options(datadist='ddist') geo.baseline.europe <- subset(geo.baseline, european.clinic.factor == "European site") geo.baseline.europe <- upData(geo.baseline.europe, drop="rna_u") ddist.europe <- datadist(geo.baseline.europe) geo.baseline.north_american <- subset(geo.baseline, european.clinic.factor == "North American site") geo.baseline.north_american <- upData(geo.baseline.north_american, drop=c("event_d5","event_d6","event6","event7","haart0_d", "event6_name","event7_name","event8_name")) ddist.north_american <- datadist(geo.baseline.north_american) geographic.origin.analysis <- model.fun(data=geo.baseline,cov="go",type="all") geographic.origin.nontb.analysis <- model.fun(data=geo.baseline,cov="go",type="non_tb") @ \begin{figure} \caption{Probability of any ADE by baseline CD4.} <>= mod.ade <- geographic.origin.analysis[[which(names(geographic.origin.analysis)=='any.ade.adj')]] par(mfrow=c(2,2),mar=c(1,1,1,1)) print(plot(Predict(mod.ade, cd4_t0_corrected,fun=plogis),xlab='Baseline CD4',ylab='Probability of any ADE in first year', adj.subtitle=FALSE)) @ \end{figure} \clearpage \begin{figure} \caption{Hazard of any ADE by baseline age.} <>= print(plot(Predict(mod.ade, age,fun=exp),xlab='Baseline age',ylab='Hazard of any ADE in first year', adj.subtitle=FALSE)) @ \end{figure} \clearpage \begin{figure} \caption{Hazard of any ADE by year of HAART start.} <>= print(plot(Predict(mod.ade, haartyr,fun=exp),xlab='Year of HAART start',ylab='Hazard of any ADE in first year', adj.subtitle=FALSE)) @ \end{figure} <>= #----------------------------------------# # Printing data # #----------------------------------------# # Unadjusted df.tot.unadj <- cbind(geographic.origin.analysis[[1]],geographic.origin.analysis[[2]][,-1]) latex(df.tot.unadj, file="", rowname=NULL, cgroup=c("","Any ADE","ADE or Death"),n.cgroup=c(1,3,3), col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any ADE or ADE/Death in the first year after HAART start and geographic origin. Results are stratified by cohort.",where="!h", insert.bottom="Overall p-value for geographic origin: Any ADE: p=0.003; ADE/Death: p = 0.12.") # Adjusted df.tot <- cbind(geographic.origin.analysis[[3]],geographic.origin.analysis[[24]],geographic.origin.analysis[[4]]) latex(df.tot, file="", rowname=NULL, cgroup=c("","Any ADE",'Death',"ADE or Death"),n.cgroup=c(1,3,3,3), col.just=c("l","r","r","r||","r","r","r||","r","r","r"),landscape=TRUE, size="small",caption="Adjusted survival models investigating the association of time to any ADE, Death, or ADE/Death in the first year after HAART start and geographic origin adjusting for a priori selected confounders (reference group for geographic origin is 'WEST' and for risk factors is 'Heterosexual'). Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value: any ADE: p < 0.001; Death: p = 0.004; ADE/Death: p < 0.001.") # L2FU - unadjusted df.tot.l2fu.unadj <- geographic.origin.analysis[[11]] latex(df.tot.l2fu.unadj, file="", rowname=NULL, landscape=TRUE,size="small",caption="Unadjusted survival model investigating the association of time to L2FU in the first year after HAART start and geographic origin. Results are stratified by cohort.",where="!h",insert.bottom="Overall p-value for geographic origin: p < 0.001.") # L2FU - adjusted df.tot.l2fu <- geographic.origin.analysis[[12]] latex(df.tot.l2fu, file="", rowname=NULL, landscape=TRUE,size="small",caption="Adjusted survival model investigating the association of time to L2FU in the first year after HAART start and geographic origin adjusting for a priori selected confounders (reference group for geographic origin is 'WEST' and for risk factors is 'Heterosexual'). Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value: p < 0.001.") @ <>= #----------------------------------------# # Printing data # #----------------------------------------# # Unadjusted df.tot.nontb.unadj <- cbind(geographic.origin.nontb.analysis[[1]],geographic.origin.nontb.analysis[[2]][,-1]) latex(df.tot.nontb.unadj, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"),n.cgroup=c(1,3,3), col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and geographic origin. Results are stratified by cohort.",where="!h", insert.bottom="Overall p-value for geographic origin: Any ADE: p=0.20; ADE/Death: p = 0.04.") # Adjusted df.nontb.tot <- cbind(geographic.origin.nontb.analysis[[3]],geographic.origin.nontb.analysis[[24]],geographic.origin.nontb.analysis[[4]]) latex(df.nontb.tot, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","Death","non-TB ADE or Death"),n.cgroup=c(1,3,3,3), col.just=c("l","r","r","r||","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any non-TB ADE, Death, or non-TB ADE/Death in the first year after HAART start and geographic origin adjusting for a priori selected confounders (reference group for geographic origin is 'WEST' and for risk factors is 'Heterosexual'). Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value: any ADE: p < 0.001; Death: p = 0.004; ADE/Death: p < 0.001.") @ <>= #------------------------------------------------------------------------------------------------------------# # Native Born Analyses # #------------------------------------------------------------------------------------------------------------# nb.baseline <- subset(sub.baseline, !is.na(native.born.factor)) nb.baseline <- upData(nb.baseline, drop=c("recruit_d","event_d7")) nb.ddist <- datadist(nb.baseline) options(datadist='nb.ddist') nb.baseline.europe <- subset(nb.baseline, european.clinic.factor == "European site") nb.baseline.europe <- upData(nb.baseline.europe, drop="rna_u") nb.ddist.europe <- datadist(nb.baseline.europe) nb.baseline.north_american <- subset(nb.baseline, european.clinic.factor == "European site") nb.baseline.north_american <- upData(nb.baseline.north_american, drop=c("event_d5","event_d6","event6","event7","haart0_d", "event6_name","event7_name","event8_name", "rna_u")) nb.ddist.north_american <- datadist(nb.baseline.north_american) native.born.analysis <- model.fun(data=nb.baseline, cov="nb",type="all") native.born.nontb.analysis <- model.fun(data=nb.baseline, cov="nb",type="non_tb") @ <>= # Testing for interaction (ie., H0: beta_europe = beta_north_american) europe.any.ade.beta <- native.born.analysis[[16]]$coeff[1] europe.any.ade.var <- diag(native.born.analysis[[16]]$var)[1] europe.ade.or.dth.beta <- native.born.analysis[[17]]$coeff[1] europe.ade.or.dth.var <- diag(native.born.analysis[[17]]$var)[1] north_american.any.ade.beta <- native.born.analysis[[18]]$coeff[1] north_american.any.ade.var <- diag(native.born.analysis[[18]]$var)[1] north_american.ade.or.dth.beta <- native.born.analysis[[19]]$coeff[1] north_american.ade.or.dth.var <- diag(native.born.analysis[[19]]$var)[1] any.ade.test.statistic <- (north_american.any.ade.beta - europe.any.ade.beta - 0)/ (sqrt(north_american.any.ade.var + europe.any.ade.var)) any.ade.p <- 2*(1-pnorm(abs(any.ade.test.statistic))) ade.or.dth.test.statistic <- (north_american.ade.or.dth.beta - europe.ade.or.dth.beta - 0)/ (sqrt(north_american.ade.or.dth.var + europe.ade.or.dth.var)) ade.or.dth.p <- 2*(1-pnorm(abs(ade.or.dth.test.statistic))) @ <>= #----------------------------------------# # Printing data # #----------------------------------------# # Unadjusted df.tot.nb.unadj <- c(native.born.analysis[[1]],native.born.analysis[[2]][-1],native.born.analysis[[27]][-1]) df.tot.nb.unadj <- t(as.matrix(df.tot.nb.unadj)) colnames(df.tot.nb.unadj) <- c("Covariate","HR","95\\% CI","P","HR","95\\% CI","P",'HR','95\\% CI','P') latex(df.tot.nb.unadj, file="", rowname=NULL, cgroup=c("","Any ADE","ADE or Death",'Death'),n.cgroup=c(1,3,3,3), col.just=c("l","r","r","r||","r","r","r||",'r','r','r'),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any ADE, ADE/Death, or Death in the first year after HAART start and native born status (reference group for native born analysis is 'Native born'). Results are stratified by cohort.",where="!h",insert.bottom="Overall p-value for native born status: Any ADE: p < 0.001; ADE/Death: p = 0.27; Death: p < 0.001." ) # Adjusted df.tot.nb <- cbind(native.born.analysis[[3]],native.born.analysis[[24]],native.born.analysis[[4]]) latex(df.tot.nb, file="", rowname=NULL, cgroup=c("","Any ADE","Death","ADE or Death"),n.cgroup=c(1,3,3,3), col.just=c("l","r","r","r||","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any ADE, Death, or ADE/Death in the first year after HAART start and native born status adjusting for a priori selected confounders (reference group for native born status is 'Native born' and for risk factors is 'Heterosexual'). Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value: any ADE: p < 0.001; Death: p = 0.004; ADE/Death: p < 0.001. Restricting the analysis to only heterosexuals gives HR, CI, p for native-born status 1) any ADE: 1.23 (1.08, 1.40), 0.002; 2) ADE/Death: 1.14, (1.01, 1.28), p=0.03; 3) Death: 0.87, (0.69, 1.11), p=0.26.") # L2FU - unadjusted df.tot.nb.l2fu.unadj <- native.born.analysis[[11]] latex(df.tot.nb.l2fu.unadj, file="", rowname=NULL, landscape=TRUE,size="small",caption="Unadjusted survival model investigating the association of time to L2FU in the first year after HAART start and native born status. Results are stratified by cohort.", where="!h", insert.bottom="Overall p-value for native born status: p < 0.001.") # L2FU - adjusted df.tot.nb.l2fu <- native.born.analysis[[12]] latex(df.tot.nb.l2fu, file="", rowname=NULL, landscape=TRUE,size="small",caption="Adjusted survival model investigating the association of time to L2FU in the first year after HAART start and native born status adjusting for a priori selected confounders (reference group for native born status is 'Native born' and for risk factors is 'Heterosexual'). Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value: p < 0.001.") @ <>= #----------------------------------------# # Printing data # #----------------------------------------# # Unadjusted df.tot.nb.nontb.unadj <- c(native.born.nontb.analysis[[1]],native.born.nontb.analysis[[2]][-1]) df.tot.nb.nontb.unadj <- t(as.matrix(df.tot.nb.nontb.unadj)) colnames(df.tot.nb.nontb.unadj) <- c("Covariate","HR","95\\% CI","P","HR","95\\% CI","P") latex(df.tot.nb.nontb.unadj, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"), n.cgroup=c(1,3,3), col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and native born status (reference group for native born analysis is 'Native born'). Results are stratified by cohort.",where="!h",insert.bottom="Overall p-value for native born status: Any ADE: p = 0.41; ADE/Death: p = 0.16." ) # Adjusted df.tot.nb.nontb <- cbind(native.born.nontb.analysis[[3]],native.born.nontb.analysis[[4]]) latex(df.tot.nb.nontb, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"),n.cgroup=c(1,3,3), col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and native born status adjusting for a priori selected confounders (reference group for native born status is 'Native born' and for risk factors is 'Heterosexual'). Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value: any ADE: p < 0.001; ADE/Death: p < 0.001.") @ \begin{figure}[!h] \begin{center} <>= par(mfrow=c(2,2),mar=c(4,4,1,1),oma=c(1,1,1,1),cex.lab=0.80) # Need to create 3-level censoring variable: 0 = no event; 1 = ADE; 2 = Death w/o ADE nb.baseline$new_censor <- ifelse(nb.baseline$ade_first_year_of_haart == 1,1, ifelse(nb.baseline$ade_first_year_of_haart == 0 & nb.baseline$death_first_year_of_haart == 1,2, ifelse(!is.na(nb.baseline$ade_first_year_of_haart),0,NA))) nb.baseline$new_l2fu_censor <- ifelse(nb.baseline$l2fu == 1,1, ifelse(nb.baseline$l2fu == 0 & nb.baseline$death_first_year_of_haart == 1,2, ifelse(!is.na(nb.baseline$l2fu),0,NA))) nb.baseline$native.born.factor <- factor(nb.baseline$native.born, levels=c(0,1),labels=c('Migrant','Non-migrant')) nb.baseline$followup_time_first_year_months <- nb.baseline$followup_time_first_year*12 nb.baseline$followup_time_l2fu_first_year_months <- nb.baseline$followup_time_l2fu_first_year*12 f <- cuminc(ftime=nb.baseline$followup_time_first_year_months, fstatus=nb.baseline$ade_first_year_of_haart, group=nb.baseline$native.born.factor, cencode="0",rho=0) fb <- cuminc(ftime=nb.baseline$followup_time_first_year_months, fstatus=nb.baseline$new_censor, group=nb.baseline$native.born.factor, cencode="0",rho=0) plot(f, ylim=c(0,0.10), xlab="Months from ART initiation", ylab="Cumulative incidence of ADE",cex=0.8,lty=c(2,1), curvlab=c('Migrant','Non-migrant'),axes=FALSE,lwd=2) axis(1,at=c(0,3,6,9,12),labels=c(0,3,6,9,12)) axis(2) box(bty='l') plot(fb, ylim=c(0,0.10), xlab="Months from ART initiation", ylab="Cumulative incidence of ADE",cex=0.75,lty=c(2,1,0,0), curvlab=c('Migrant','Non-migrant','',''),axes=FALSE,lwd=2) axis(1,at=c(0,3,6,9,12),labels=c(0,3,6,9,12)) axis(2) box(bty='l') text(9.5,0.238,paste("ADE: ", "p=",round(summary(unadj.mod.ade.adedth)$coef[1,5],2),sep=""), cex=0.85) geo.baseline$followup_time_first_year_months <- geo.baseline$followup_time_first_year*12 geo.baseline$followup_time_l2fu_first_year_months <- geo.baseline$followup_time_l2fu_first_year*12 geo.baseline$new_censor <- ifelse(geo.baseline$ade_first_year_of_haart == 1,1, ifelse(geo.baseline$ade_first_year_of_haart == 0 & geo.baseline$death_first_year_of_haart == 1,2, ifelse(!is.na(geo.baseline$ade_first_year_of_haart),0,NA))) geo.baseline$new_l2fu_censor <- ifelse(geo.baseline$l2fu == 1,1, ifelse(geo.baseline$l2fu == 0 & geo.baseline$death_first_year_of_haart == 1,2, ifelse(!is.na(geo.baseline$l2fu),0,NA))) geo.baseline$new_l2fu_censor.factor <- factor(geo.baseline$new_l2fu_censor, levels=c(0,1,2), labels=c('Censored','L2FU','Died')) geo.baseline$artcc_origin_g_b.factor <- factor(as.character(geo.baseline$artcc_origin_g.factor), levels=c('WEST','SSA','LA','NAME','ASIA')) f2 <- cuminc(ftime=geo.baseline$followup_time_first_year_months, fstatus=geo.baseline$new_censor, group=geo.baseline$artcc_origin_g_b.factor, cencode='0',rho=0) plot(f2, ylim=c(0,0.10), xlab='Months from ART initiation', ylab='Cumulative incidence of ADE',cex=0.75,lty=c(1,2,5,3,4,0,0,0,0,0),col=c(1,2,5,3,4,0,0,0,0,0), # curvlab=c('Non-migrant','NAME','SSA','Asia','LA','','','','',''),axes=FALSE) curvlab=c('Non-migrant','Sub-Saharan Africa','Latin America','North Africa/Middle East','Asia', '','','','',''),axes=FALSE,lwd=c(rep(2,times=3),3.0,2)) axis(1,at=c(0,3,6,9,12),labels=c(0,3,6,9,12)) axis(2) box(bty='l') \begin{figure}[height=2in,width=2in] \caption{Kaplan Meier curve for probability of survival across migrant status/geographic origin.} <>= #------------------------------------# # Time-to-event analysis # #------------------------------------# # Kaplan-Meier curve Srv.ade.new <- Surv(nb.baseline$followup_time_first_year,nb.baseline$ade_first_year_of_haart) Srv.ade.new.months <- Surv(nb.baseline$followup_time_first_year_months,nb.baseline$ade_first_year_of_haart) dd <- datadist(nb.baseline) options(datadist='dd') par(mfrow=c(2,2),mar=c(4,4,3,1)) plot(survfit(Srv.ade.new.months ~ nb.baseline$native.born.factor), mark.time=FALSE, lty=c(2,1), frame.plot=FALSE,ylim=c(0,0.10),fun=function(x)(1-x), xlab="Months from ART initiation",ylab="Probability of ADE",axes=FALSE,lwd=2) axis(1,at=c(0,3,6,9,12),labels=c(0,3,6,9,12)) axis(2) box(bty='l') legend("topleft", legend=c("Migrant", "Non-migrant"), lty=c(2,1),lwd=2, cex=0.90,bty="n") logrank.test <- logrank(Srv.ade.new, nb.baseline$native.born.factor) logrank.p <- 1-pchisq(logrank.test[1],df=1) logrank.p <- ifelse(logrank.p < 0.001,"< 0.001", ifelse(logrank.p > 0.001 & logrank.p < 0.01,round(logrank.p,3),round(logrank.p,2))) Srv.ade.new2 <- Surv(geo.baseline$followup_time_first_year,geo.baseline$ade_first_year_of_haart) Srv.ade.new2.months <- Surv(geo.baseline$followup_time_first_year_months,geo.baseline$ade_first_year_of_haart) dd <- datadist(geo.baseline) options(datadist='dd') plot(survfit(Srv.ade.new2.months ~ geo.baseline$artcc_origin_g_b.factor), mark.time=FALSE, lty=c(1,1,2,2,3), col=c(1,'grey70',1,'grey70',1), lwd=c(rep(2,times=3),2.0,2), frame.plot=FALSE,ylim=c(0.0,0.10),fun=function(x)(1-x), xlab="Months from ART initiation",ylab="Probability of ADE",axes=FALSE) axis(1,at=c(0,3,6,9,12),labels=c(0,3,6,9,12)) axis(2) box(bty='l') legend("topleft", legend=c('Non-migrant','Sub-Saharan Africa','Latin America','North Africa/Middle East','Asia'), col=c(1,'grey70',1,'grey70',1), lwd=c(rep(2,times=3),2.0,2), lty=c(1,1,2,2,3), cex=0.90,bty="n") logrank.test <- logrank(Srv.ade.new2, geo.baseline$artcc_origin_g.factor) logrank.p <- 1-pchisq(logrank.test[1],df=1) logrank.p <- ifelse(logrank.p < 0.001,"< 0.001", ifelse(logrank.p > 0.001 & logrank.p < 0.01,round(logrank.p,3),round(logrank.p,2))) @ \end{figure} \begin{figure}[height=2in,width=2in] \caption{Kaplan Meier curve for probability of survival across migrant status/geographic origin.} <>= #------------------------------------# # Time-to-event analysis # #------------------------------------# # Kaplan-Meier curve Srv.dth <- Surv(nb.baseline$followup_time_first_year,nb.baseline$death_first_year_of_haart) Srv.dth.months <- Surv(nb.baseline$followup_time_first_year_months,nb.baseline$death_first_year_of_haart) dd <- datadist(nb.baseline) options(datadist='dd') plot(survfit(Srv.dth.months ~ nb.baseline$native.born.factor), mark.time=FALSE, lty=c(2,1), frame.plot=FALSE,ylim=c(0,0.05),fun=function(x)(1-x), xlab="Months from ART initiation",ylab="Probability of death",axes=FALSE,lwd=2) axis(1,at=c(0,3,6,9,12),labels=c(0,3,6,9,12)) axis(2) box(bty='l') legend("topleft", legend=c("Migrant", "Non-migrant"), lty=c(2,1),lwd=2, cex=0.90,bty="n") logrank.test <- logrank(Srv.dth, nb.baseline$native.born.factor) logrank.p <- 1-pchisq(logrank.test[1],df=1) logrank.p <- ifelse(logrank.p < 0.001,"< 0.001", ifelse(logrank.p > 0.001 & logrank.p < 0.01,round(logrank.p,3),round(logrank.p,2))) Srv.dth2 <- Surv(geo.baseline$followup_time_first_year,geo.baseline$death_first_year_of_haart) Srv.dth2.months <- Surv(geo.baseline$followup_time_first_year_months,geo.baseline$death_first_year_of_haart) dd <- datadist(geo.baseline) options(datadist='dd') plot(survfit(Srv.dth2.months ~ geo.baseline$artcc_origin_g_b.factor), mark.time=FALSE, lty=c(1,1,2,2,3), col=c(1,'grey70',1,'grey70',1), lwd=c(rep(2,times=3),2.0,2), frame.plot=FALSE,ylim=c(0.0,0.05),fun=function(x)(1-x), xlab="Months from ART initiation",ylab="Probability of death",axes=FALSE) axis(1,at=c(0,3,6,9,12),labels=c(0,3,6,9,12)) axis(2) box(bty='l') legend("topleft", legend=c('Non-migrant','Sub-Saharan Africa','Latin America','North Africa/Middle East','Asia'), col=c(1,'grey70',1,'grey70',1), lwd=c(rep(2,times=3),2.0,2), lty=c(1,1,2,2,3), cex=0.90,bty="n") logrank.test <- logrank(Srv.dth2, geo.baseline$artcc_origin_g.factor) logrank.p <- 1-pchisq(logrank.test[1],df=1) logrank.p <- ifelse(logrank.p < 0.001,"< 0.001", ifelse(logrank.p > 0.001 & logrank.p < 0.01,round(logrank.p,3),round(logrank.p,2))) text(x=0.30,y=0.05,paste("Logrank p-value = ",logrank.p,sep=""),cex=0.65) @ \end{center} \end{figure} \begin{figure} \begin{center} <>= f.l2fu <- cuminc(ftime=nb.baseline$followup_time_l2fu_first_year_months, fstatus=nb.baseline$new_l2fu_censor, group=nb.baseline$native.born.factor, cencode='0',rho=0) f2.l2fu <- cuminc(ftime=geo.baseline$followup_time_l2fu_first_year_months, fstatus=geo.baseline$new_l2fu_censor, group=geo.baseline$artcc_origin_g_b.factor, cencode='0',rho=0) par(mfrow=c(2,2),mar=c(4,4,1,1)) plot(f.l2fu, ylim=c(0,0.20), xlab="Months from cART initiation", ylab="Cumulative incidence of LTFU",cex=0.8,lty=c(2,1,0,0), curvlab=c('Migrant','Non-migrant','',''),axes=FALSE) axis(1,at=c(0,3,6,9,12),labels=c(0,3,6,9,12)) axis(2) box(bty='l') plot(f2.l2fu, ylim=c(0,0.20), xlab="Months from cART initiation", ylab="Cumulative incidence of LTFU",cex=0.8,lty=c(1,2,5,3,4,0,0,0,0,0),col=c(1,2,5,3,4,0,0,0,0,0), curvlab=c('Non-migrant','Sub-Saharan Africa','Latin America','North Africa/Middle East','Asia'),axes=FALSE) axis(1,at=c(0,3,6,9,12),labels=c(0,3,6,9,12)) axis(2) box(bty='l') covb <- col(matrix(0,nrow=dim(geo.baseline)[1],ncol=length(levels(geo.baseline$artcc_origin_g.factor)))) == as.integer(geo.baseline$artcc_origin_g.factor) colnames(covb) <- c('Non-migrant','NAME','SSA','ASIA','LA') covb <- covb[,-1] l2fu.mod.geo <- crr(ftime=geo.baseline$followup_time_l2fu_first_year, fstatus=geo.baseline$new_l2fu_censor, cov1=covb, failcode=1) mod.geo <- crr(ftime=geo.baseline$followup_time_first_year, fstatus=geo.baseline$new_censor, cov1=covb, failcode=1) mod.geo.dth <- crr(ftime=geo.baseline$followup_time_first_year, fstatus=geo.baseline$death_first_year_of_haart, cov1=covb, failcode=1) @ \end{center} \end{figure} %---------------------------------------------------------------------------------% % Excluding MSM analysis % %---------------------------------------------------------------------------------% <>= msm.baseline <- subset(sub.baseline, risk.factor != "MSM") msm.baseline$risk.factor <- factor(msm.baseline$risk.factor, levels=c("Heterosexual","IDU","Blood, Other or unknown")) ddist.msm <- datadist(msm.baseline) options(datadist='ddist.msm') #------------------------------------------------------------------------------------------------------------# # Geographic Origin Analyses # #------------------------------------------------------------------------------------------------------------# msm.geo.baseline <- subset(msm.baseline, !is.na(artcc_origin_g.factor)) msm.geo.baseline <- upData(msm.geo.baseline, drop="event_d7") ddist.msm.go <- datadist(msm.geo.baseline) options(datadist='ddist.msm.go') msm.geo.baseline.europe <- subset(msm.geo.baseline, european.clinic.factor == "European site") msm.geo.baseline.europe <- upData(msm.geo.baseline.europe, drop="rna_u") msm.geo.ddist.europe <- datadist(msm.geo.baseline.europe) msm.geo.baseline.north_american <- subset(msm.geo.baseline, european.clinic.factor == "European site") msm.geo.baseline.north_american <- upData(msm.geo.baseline.north_american, drop=c("event_d5","event_d6","event6","event7","haart0_d", "event6_name","event7_name","event8_name", "rna_u")) msm.geo.ddist.north_american <- datadist(msm.geo.baseline.north_american) msm.geographic.origin.analysis <- model.fun(data=msm.geo.baseline,cov="go",type="all") msm.geographic.origin.nontb.analysis <- model.fun(data=msm.geo.baseline,cov="go",type="non_tb") @ <>= #----------------------------------------# # Printing data # #----------------------------------------# # Unadjusted msm.df.tot.unadj <- cbind(msm.geographic.origin.analysis[[1]],msm.geographic.origin.analysis[[2]][,-1]) latex(msm.df.tot.unadj, file="", rowname=NULL, cgroup=c("","Any ADE","ADE or Death"),n.cgroup=c(1,3,3), col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any ADE or ADE/Death in the first year after HAART start and geographic origin, excluding MSM subjects (reference for geographic origin is 'WEST'). Results are stratified by cohort.",where="!h", insert.bottom="Overall p-value for geographic origin: Any ADE: p=0.02; ADE/Death: p = 0.15.") # Adjusted msm.df.tot <- cbind(msm.geographic.origin.analysis[[3]],msm.geographic.origin.analysis[[4]]) latex(msm.df.tot, file="", rowname=NULL, cgroup=c("","Any ADE","ADE or Death"),n.cgroup=c(1,3,3), col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any ADE or ADE/Death in the first year after HAART start and geographic origin adjusting for a priori selected confounders and excluding MSM subjects (reference for geographic origin is 'WEST' and for risk factors is 'Heterosexual'). Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value: any ADE: p < 0.001; ADE/Death: p < 0.001.") # L2FU - unadjusted msm.df.tot.l2fu.unadj <- msm.geographic.origin.analysis[[11]] latex(msm.df.tot.l2fu.unadj, file="", rowname=NULL, landscape=TRUE,size="small",caption="Unadjusted survival model investigating the association of time to L2FU in the first year after HAART start and geographic origin, excluding MSM subjects (reference for geographic origin is 'WEST'). Results are stratified by cohort.", where="!h", insert.bottom="Overall p-value for race: p < 0.001.") # L2FU - adjusted msm.df.tot.l2fu <- msm.geographic.origin.analysis[[12]] latex(msm.df.tot.l2fu, file="", rowname=NULL, landscape=TRUE,size="small",caption="Adjusted survival model investigating the association of time to L2FU in the first year after HAART start and geographic origin adjusting for a priori selected confounders and excluding MSM subjects (reference group for geographic origin is 'WEST' and for risk factors is 'Heterosexual'). Results are stratified by cohort.",where="!h", insert.bottom="Total non-linear p-value: p < 0.001.") @ <>= #----------------------------------------# # Printing data # #----------------------------------------# # Unadjusted msm.df.tot.nontb.unadj <- cbind(msm.geographic.origin.nontb.analysis[[1]],msm.geographic.origin.nontb.analysis[[2]][,-1]) latex(msm.df.tot.nontb.unadj, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"), n.cgroup=c(1,3,3), col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and geographic origin, excluding MSM subjects (reference for geographic origin is 'WEST'). Results are stratified by cohort.",where="!h", insert.bottom="Overall p-value for geographic origin: Any ADE: p=0.30; ADE/Death: p = 0.01.") # Adjusted msm.df.tot.nontb <- cbind(msm.geographic.origin.nontb.analysis[[3]],msm.geographic.origin.nontb.analysis[[4]]) latex(msm.df.tot.nontb, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"),n.cgroup=c(1,3,3), col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and geographic origin adjusting for a priori selected confounders and excluding MSM subjects (reference for geographic origin is 'WEST' and for risk factors is 'Heterosexual'). Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value: any ADE: p < 0.001; ADE/Death: p < 0.001.") @ <>= #------------------------------------------------------------------------------------------------------------# # Native Born Analyses # #------------------------------------------------------------------------------------------------------------# msm.nb.baseline <- subset(msm.baseline, !is.na(native.born.factor)) msm.nb.baseline <- upData(msm.nb.baseline, drop=c("recruit_d","event_d7")) msm.nb.ddist <- datadist(msm.nb.baseline) options(datadist='msm.nb.ddist') msm.nb.baseline.europe <- subset(msm.nb.baseline, european.clinic.factor == "European site") msm.nb.baseline.europe <- upData(msm.nb.baseline.europe, drop="rna_u") msm.nb.ddist.europe <- datadist(msm.nb.baseline.europe) msm.nb.baseline.north_american <- subset(msm.nb.baseline, european.clinic.factor == "European site") msm.nb.baseline.north_american <- upData(msm.nb.baseline.north_american, drop=c("event_d5","event_d6","event6","event7","haart0_d", "event6_name","event7_name","event8_name", "rna_u")) msm.nb.ddist.north_american <- datadist(msm.nb.baseline.north_american) msm.native.born.analysis <- model.fun(data=msm.nb.baseline, cov="nb",type="all") msm.native.born.nontb.analysis <- model.fun(data=msm.nb.baseline, cov="nb",type="non_tb") @ <>= #----------------------------------------# # Printing data # #----------------------------------------# # Unadjusted msm.df.tot.nb.unadj <- c(msm.native.born.analysis[[1]],msm.native.born.analysis[[2]][-1]) msm.df.tot.nb.unadj <- t(as.matrix(msm.df.tot.nb.unadj)) colnames(msm.df.tot.nb.unadj) <- c("Covariate","HR","95\\% CI","P","HR","95\\% CI","P") latex(msm.df.tot.nb.unadj, file="", rowname=NULL, cgroup=c("","Any ADE","ADE or Death"),n.cgroup=c(1,3,3), col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any ADE or ADE/Death in the first year after HAART start and native born status, excluding MSM subjects (reference for native born status is 'Native Born'). Results are stratified by cohort.",where="!h",insert.bottom="Overall p-value for native born status: any ADE: p = 0.004; ADE/Death: p = 0.89.") # Adjusted msm.df.tot.nb <- cbind(msm.native.born.analysis[[3]],msm.native.born.analysis[[4]]) latex(msm.df.tot.nb, file="", rowname=NULL, cgroup=c("","Any ADE","ADE or Death"),n.cgroup=c(1,3,3), col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any ADE or ADE/Death in the first year after HAART start and native born status adjusting for a priori selected confounders and excluding MSM subjects (reference for native born status is 'Native Born' and for risk factors is 'Heterosexual'). Results are stratified by cohort.",where="!h", insert.bottom="Total non-linear p-value: any ADE: p < 0.001; ADE/Death: p < 0.001.") # L2FU - unadjusted msm.df.tot.nb.l2fu.unadj <- msm.native.born.analysis[[11]] latex(msm.df.tot.nb.l2fu.unadj, file="", rowname=NULL, landscape=TRUE,size="small",caption="Unadjusted survival model investigating the association of time to L2FU in the first year after HAART start and native born status, excluding MSM subjects (reference for native born status is 'Yes'). Results are stratified by cohort.", where="!h", insert.bottom="Overall p-value for race: p = 0.27.") # L2FU - adjusted msm.df.tot.nb.l2fu <- msm.native.born.analysis[[12]] latex(msm.df.tot.nb.l2fu, file="", rowname=NULL, landscape=TRUE,size="small",caption="Adjusted survival model investigating the association of time to L2FU in the first year after HAART start and native born status adjusting for a priori selected confounders and excluding MSM subjects (reference group for native born status is 'Native born' and for risk factors is 'Heterosexual'). Results are stratified by cohort.",where="!h", insert.bottom="Total non-linear p-value: p < 0.001.") @ <>= #----------------------------------------# # Printing data # #----------------------------------------# # Unadjusted msm.df.tot.nb.nontb.unadj <- c(msm.native.born.nontb.analysis[[1]],msm.native.born.nontb.analysis[[2]][-1]) msm.df.tot.nb.nontb.unadj <- t(as.matrix(msm.df.tot.nb.nontb.unadj)) colnames(msm.df.tot.nb.nontb.unadj) <- c("Covariate","HR","95\\% CI","P","HR","95\\% CI","P") latex(msm.df.tot.nb.nontb.unadj, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"), n.cgroup=c(1,3,3), col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and native born status, excluding MSM subjects (reference for native born status is 'Native Born'). Results are stratified by cohort.",where="!h",insert.bottom="Overall p-value for native born status: any ADE: p = 0.63; ADE/Death: p = 0.03.") # Adjusted msm.df.tot.nb.nontb <- cbind(msm.native.born.nontb.analysis[[3]],msm.native.born.nontb.analysis[[4]]) latex(msm.df.tot.nb.nontb, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"),n.cgroup=c(1,3,3), col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and native born status adjusting for a priori selected confounders and excluding MSM subjects (reference for native born status is 'Native Born' and for risk factors is 'Heterosexual'). Results are stratified by cohort.",where="!h", insert.bottom="Total non-linear p-value: any ADE: p < 0.001; ADE/Death: p < 0.001.") @ %---------------------------------------------------------------------------------% % Excluding IDU analysis % %---------------------------------------------------------------------------------% <>= idu.baseline <- subset(sub.baseline, risk.factor != "IDU") idu.baseline$risk.factor <- factor(idu.baseline$risk.factor, levels=c("Heterosexual","MSM","Blood, Other or unknown")) ddist.idu <- datadist(idu.baseline) options(datadist='ddist.idu') #------------------------------------------------------------------------------------------------------------# # Geographic Origin Analyses # #------------------------------------------------------------------------------------------------------------# idu.geo.baseline <- subset(idu.baseline, !is.na(artcc_origin_g.factor)) idu.geo.baseline <- upData(idu.geo.baseline, drop="event_d7") ddist.idu.go <- datadist(idu.geo.baseline) options(datadist='ddist.idu.go') idu.geo.baseline.europe <- subset(idu.geo.baseline, european.clinic.factor == "European site") idu.geo.baseline.europe <- upData(idu.geo.baseline.europe, drop="rna_u") idu.geo.ddist.europe <- datadist(idu.geo.baseline.europe) idu.geo.baseline.north_american <- subset(idu.geo.baseline, european.clinic.factor == "European site") idu.geo.baseline.north_american <- upData(idu.geo.baseline.north_american, drop=c("event_d5","event_d6","event6","event7","haart0_d", "event6_name","event7_name","event8_name","rna_u")) idu.geo.ddist.north_american <- datadist(idu.geo.baseline.north_american) idu.geographic.origin.analysis <- model.fun(data=idu.geo.baseline,cov="go",type="all") idu.geographic.origin.nontb.analysis <- model.fun(data=idu.geo.baseline,cov="go",type="non_tb") @ <>= #----------------------------------------# # Printing data # #----------------------------------------# # Unadjusted idu.df.tot.unadj <- cbind(idu.geographic.origin.analysis[[1]],idu.geographic.origin.analysis[[2]][,-1]) latex(idu.df.tot.unadj, file="", rowname=NULL, cgroup=c("","Any ADE","ADE or Death"),n.cgroup=c(1,3,3), col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any ADE or ADE/Death in the first year after HAART start and geographic origin, excluding IDU subjects (reference for geographic origin is 'WEST'). Results are stratified by cohort.", where="!h",insert.bottom="Overall p-value for geographic origin: Any ADE: p < 0.001; ADE/Death: p = 0.10.") # Adjusted idu.df.tot <- cbind(idu.geographic.origin.analysis[[3]],idu.geographic.origin.analysis[[4]]) latex(idu.df.tot, file="", rowname=NULL, cgroup=c("","Any ADE","ADE or Death"),n.cgroup=c(1,3,3), col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any ADE or ADE/Death in the first year after HAART start and geographic origin adjusting for a priori selected confounders and excluding IDU subjects (reference for geographic origin is 'WEST' and for risk factors is 'Heterosexual'). Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value: any ADE: p < 0.001; ADE/Death: p < 0.001.") # L2FU - unadjusted idu.df.tot.l2fu.unadj <- idu.geographic.origin.analysis[[11]] latex(idu.df.tot.l2fu.unadj, file="", rowname=NULL, landscape=TRUE,size="small",caption="Unadjusted survival model investigating the association of time to L2FU in the first year after HAART start and geographic analysis, excluding IDU subjects (reference for geographic origin is 'WEST'). Results are stratified by cohort.", where="!h", insert.bottom="Overall p-value for race: p < 0.001.") # L2FU - adjusted idu.df.tot.l2fu <- idu.geographic.origin.analysis[[12]] latex(idu.df.tot.l2fu, file="", rowname=NULL, landscape=TRUE,size="small",caption="Adjusted survival model investigating the association of time to L2FU in the first year after HAART start and geographic origin adjusting for a priori selected confounders and excluding IDU subjects (reference group for geographic origin is 'WEST' and for risk factors is 'Heterosexual'). Results are stratified by cohort.",where="!h", insert.bottom="Total non-linear p-value: p < 0.001.") @ <>= #----------------------------------------# # Printing data # #----------------------------------------# # Unadjusted idu.df.tot.unadj.nontb <- cbind(idu.geographic.origin.nontb.analysis[[1]],idu.geographic.origin.nontb.analysis[[2]][,-1]) latex(idu.df.tot.unadj.nontb, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"), n.cgroup=c(1,3,3), col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and geographic origin, excluding IDU subjects (reference for geographic origin is 'WEST'). Results are stratified by cohort.", where="!h",insert.bottom="Overall p-value for geographic origin: Any ADE: p=0.07; ADE/Death: p = 0.07.") # Adjusted idu.df.tot.nontb <- cbind(idu.geographic.origin.nontb.analysis[[3]],idu.geographic.origin.nontb.analysis[[4]]) latex(idu.df.tot.nontb, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"),n.cgroup=c(1,3,3), col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and geographic origin adjusting for a priori selected confounders and excluding IDU subjects (reference for geographic origin is 'WEST' and for risk factors is 'Heterosexual'). Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value: any ADE: p < 0.001; ADE/Death: p < 0.001.") @ <>= #------------------------------------------------------------------------------------------------------------# # Native Born Analyses # #------------------------------------------------------------------------------------------------------------# idu.nb.baseline <- subset(idu.baseline, !is.na(native.born.factor)) idu.nb.baseline <- upData(idu.nb.baseline, drop=c("event_d7","event_d8")) idu.nb.ddist <- datadist(idu.nb.baseline) options(datadist='idu.nb.ddist') idu.nb.baseline.europe <- subset(idu.nb.baseline, european.clinic.factor == "European site") idu.nb.baseline.europe <- upData(idu.nb.baseline.europe, drop="rna_u") idu.nb.ddist.europe <- datadist(idu.nb.baseline.europe) idu.nb.baseline.north_american <- subset(idu.nb.baseline, european.clinic.factor == "European site") idu.nb.baseline.north_american <- upData(idu.nb.baseline.north_american, drop=c("event_d5","event_d6","event6","event7","haart0_d", "event6_name","event7_name","event8_name","rna_u")) idu.nb.ddist.north_american <- datadist(idu.nb.baseline.north_american) idu.native.born.analysis <- model.fun(data=idu.nb.baseline, cov="nb",type="all") idu.native.born.nontb.analysis <- model.fun(data=idu.nb.baseline, cov="nb",type="non_tb") ## Need Bryan's input on this @ <>= #----------------------------------------# # Printing data # #----------------------------------------# # Unadjusted idu.df.tot.nb.unadj <- c(idu.native.born.analysis[[1]],idu.native.born.analysis[[2]][-1]) idu.df.tot.nb.unadj <- t(as.matrix(idu.df.tot.nb.unadj)) colnames(idu.df.tot.nb.unadj) <- c("Covariate","HR","95\\% CI","P","HR","95\\% CI","P") latex(idu.df.tot.nb.unadj, file="", rowname=NULL, cgroup=c("","Any ADE","ADE or Death"),n.cgroup=c(1,3,3), col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any ADE or ADE/Death in the first year after HAART start and native born status, excluding IDU subjects (reference for native born status is 'Native Born'). Results are stratified by cohort.",where="!h",insert.bottom="Overall p-value for native born status: any ADE: p < 0.001; ADE/Death: p = 0.12.") # Adjusted idu.df.tot.nb <- cbind(idu.native.born.analysis[[3]],idu.native.born.analysis[[4]]) latex(idu.df.tot.nb, file="", rowname=NULL, cgroup=c("","Any ADE","ADE or Death"),n.cgroup=c(1,3,3), col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any ADE or ADE/Death in the first year after HAART start and native born status adjusting for a priori selected confounders and excluding IDU subjects (reference for native born status is 'Native Born' and for risk factors is 'Heterosexual'). Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value: any ADE: p < 0.001; ADE/Death: p < 0.001.") # L2FU - unadjusted idu.df.tot.nb.l2fu.unadj <- idu.native.born.analysis[[11]] latex(idu.df.tot.nb.l2fu.unadj, file="", rowname=NULL, landscape=TRUE,size="small",caption="Unadjusted survival model investigating the association of time to L2FU in the first year after HAART start and native born status, excluding IDU subjects (reference for native born status is 'Yes'). Results are stratified by cohort.", where="!h", insert.bottom="Overall p-value for race: p < 0.001.") # L2FU - adjusted idu.df.tot.nb.l2fu <- idu.native.born.analysis[[12]] latex(idu.df.tot.nb.l2fu, file="", rowname=NULL, landscape=TRUE,size="small",caption="Adjusted survival model investigating the association of time to L2FU in the first year after HAART start and native born status adjusting for a priori selected confounders and excluding IDU subjects (reference group for native born status is 'Native born' and for risk factors is 'Heterosexual'). Results are stratified by cohort.",where="!h", insert.bottom="Total non-linear p-value: p < 0.001.") @ <>= #----------------------------------------# # Printing data # #----------------------------------------# # Unadjusted idu.df.tot.nb.unadj.nontb <- t(as.matrix(c(idu.native.born.nontb.analysis[[1]],idu.native.born.nontb.analysis[[2]][,-1]))) colnames(idu.df.tot.nb.unadj.nontb) <- c("Covariate",rep(c("Hazard Ratio","95\\% CI","P"),times=2)) latex(idu.df.tot.nb.unadj.nontb, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"), n.cgroup=c(1,3,3), col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Unadjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and native born status, excluding IDU subjects (reference for native born status is 'Yes'). Results are stratified by cohort.", where="!h",insert.bottom="Overall p-value for native born status: Any ADE: p=0.33; ADE/Death: p=0.33.") # Adjusted idu.df.tot.nb.nontb <- cbind(idu.native.born.nontb.analysis[[3]],idu.native.born.nontb.analysis[[4]]) latex(idu.df.tot.nb.nontb, file="", rowname=NULL, cgroup=c("","Any non-TB ADE","non-TB ADE or Death"),n.cgroup=c(1,3,3), col.just=c("l","r","r","r||","r","r","r"),landscape=TRUE,size="small",caption="Adjusted survival models investigating the association of time to any non-TB ADE or non-TB ADE/Death in the first year after HAART start and native born status adjusting for a priori selected confounders and excluding IDU subjects (reference for native born status is 'Yes' and for risk factors is 'Heterosexual'). Results are stratified by cohort.",where="!h",insert.bottom="Total non-linear p-value: any ADE: p < 0.001; ADE/Death: p < 0.001.") @ %---------------------------------------------------------------------------------% % Including only Heterosexuals % % Based on comments to manuscript from ART-CC community % %---------------------------------------------------------------------------------% <>= hetero.baseline <- subset(sub.baseline, risk.factor == "Heterosexual") hetero.baseline <- upData(hetero.baseline, drop='event_d7') ddist.hetero <- datadist(hetero.baseline) options(datadist='ddist.hetero') #------------------------------------------------------------------------------------------------------------# # Native Born Analyses # #------------------------------------------------------------------------------------------------------------# hetero.nb.baseline <- subset(hetero.baseline, !is.na(native.born.factor)) hetero.nb.baseline <- upData(hetero.nb.baseline, drop=c("recruit_d")) hetero.nb.ddist <- datadist(hetero.nb.baseline) options(datadist='hetero.nb.ddist') hetero.nb.baseline.europe <- subset(hetero.nb.baseline, european.clinic.factor == "European site") hetero.nb.baseline.europe <- upData(hetero.nb.baseline.europe, drop="rna_u") hetero.nb.ddist.europe <- datadist(hetero.nb.baseline.europe) hetero.nb.baseline.north_american <- subset(hetero.nb.baseline, european.clinic.factor == "European site") hetero.nb.baseline.north_american <- upData(hetero.nb.baseline.north_american, drop=c("event_d5","event_d6","event6","event7","haart0_d", "event6_name","event7_name","event8_name", "rna_u")) hetero.nb.ddist.north_american <- datadist(hetero.nb.baseline.north_american) data <- hetero.nb.baseline var. <- "native.born.factor" ddist <- datadist(data) # For sub-group analyses in adjusted models data.europe <- subset(data, european.clinic.factor == "European site") data.north_american <- subset(data, european.clinic.factor == "North American site") data.europe <- upData(data.europe, drop=c("rna_u","event_d7")) ddist.europe <- datadist(data.europe) data.north_american <- upData(data.north_american, drop=c("event_d5","event_d6","event6","event7","haart0_d", "event6_name","event7_name","event8_name")) ddist.north_american <- datadist(data.north_american) #-----------------------------# # Time to any ADE # # in the first year # #-----------------------------# Srv.ade <- Surv(data$followup_time_first_year,data$ade_first_year_of_haart) Srv.ade.europe <- Surv(data.europe$followup_time_first_year, data.europe$ade_first_year_of_haart) Srv.ade.north_american <- Surv(data.north_american$followup_time_first_year, data.north_american$ade_first_year_of_haart) #-----------------------------# # Time to ADE or death # # in the first year # #-----------------------------# Srv.ade.dth <- Surv(data$followup_time_first_year,data$ade_or_death_first_year_of_haart) Srv.ade.dth.europe <- Surv(data.europe$followup_time_first_year,data.europe$ade_or_death_first_year_of_haart) Srv.ade.dth.north_american <- Surv(data.north_american$followup_time_first_year, data.north_american$ade_or_death_first_year_of_haart) #-----------------------------# # Time to death # # in the first year # #-----------------------------# Srv.dth <- Surv(data$followup_time_first_year,data$death_first_year_of_haart) # Adjusted predictors.adj <- c(var.,"gender.factor","rcs(cd4_t0_corrected,4)","rcs(lrna_t0_corrected,3)", "rcs(age,3)","pi_based.factor","nnrti_based.factor", "rcs(haartyr,3)","stagec.factor","strat(cohort)") predictors2.adj <- c(var.,"gender.factor","rcs(cd4_t0_corrected,4)","rcs(lrna_t0_corrected,3)", "rcs(age,3)","pi_based.factor","nnrti_based.factor", "rcs(haartyr,3)","stagec.factor","strata(cohort)") # Any ADE fmla <- as.formula(paste("Srv.ade ~ ",paste(predictors.adj,collapse="+"))) options(datadist='ddist') any.ade.adj <- cph(fmla, data=data) summ.50 <- summary(any.ade.adj, native.born.factor = "Yes", gender.factor = "Male", cd4_t0_corrected = c(200,50), lrna_t0_corrected = c(5,4), age = c(40,20), pi_based.factor = "No", nnrti_based.factor = "No", haartyr = c(2001,1998), stagec.factor = "No") ade.p <- anova(any.ade.adj) # ADE or Death fmla <- as.formula(paste("Srv.ade.dth ~ ",paste(predictors.adj, collapse="+"))) fmla.europe <- as.formula(paste("Srv.ade.dth.europe ~ ",paste(predictors.adj, collapse="+"))) fmla.north_american <- as.formula(paste("Srv.ade.dth.north_american ~ ",paste(predictors.adj, collapse="+"))) options(datadist='ddist') ade.or.dth.adj <- cph(fmla, data=data) summ.50.adedth <- summary(ade.or.dth.adj, native.born.factor = "Yes", gender.factor = "Male", cd4_t0_corrected = c(200,50), lrna_t0_corrected = c(5,4), age = c(40,20), pi_based.factor = "No", nnrti_based.factor = "No", haartyr = c(2001,1998), stagec.factor = "No") ade.dth.p <- anova(ade.or.dth.adj) # Death fmla <- as.formula(paste("Srv.dth ~ ",paste(predictors.adj, collapse="+"))) options(datadist='ddist') dth.adj <- cph(fmla, data=data) summ.50.dth <- summary(dth.adj, native.born.factor = "Yes", gender.factor = "Male", cd4_t0_corrected = c(200,50), lrna_t0_corrected = c(5,4), age = c(40,20), pi_based.factor = "No", nnrti_based.factor = "No", haartyr = c(2001,1998), stagec.factor = "No") dth.p <- anova(dth.adj) @ \end{document}