# This function meshes the current data with the trt dependent data frame # inserting the appropriate treatment at the appropriate age load('d2.rda') sub.x <- d2 load('trt.rda') repeating.cols <- c("male","sex", "ethnicity", "black", "white", "deceased", "age_at_death","immedcodaids", "immedcod_chorus", "probable_route_of_infection", "idu", "hetero", "msm", "missing_route", "other_route", "nbr_office_visits", "age_at_first_visit", "lost_to_fu") rep.vals <- lapply(split(sub.x, sub.x$pid), function(x) x[1,c("pid", repeating.cols)]) trt$pid <- as.character(trt$pid) trt <- trt[trt$pid %in% unique(sub.x$pid),] trt$age.rx.start <- as.integer(round(trt$age.rx.start*365.25)) trt$age.rx.end <- as.integer(round(trt$age.rx.end*365.25)) sub.x$age <- as.integer(round(sub.x$age*365.25)) trt <- trt[order(trt$pid,trt$age.rx.start,trt$age.rx.end, na.last=FALSE), c("pid", "age.rx.start", "age.rx.end", "ccr5", "days_duration", "fi", "fhaart", "haart", "haart_before_first_visit", "nbr_drugs_in_regimen", "nnrti", "nrti", "pi", "pi_boosted", "regimen","art.0")] sub.x <- sub.x[order(sub.x$pid, sub.x$age),!names(sub.x) %in% c("fhaart","haart")] trt.cols <- names(trt)[!names(trt) %in% c("age.rx.start", "age.rx.end")] subx.cols <- names(sub.x)[!names(sub.x) %in% c("fhaart","haart")] dropped.cols <- !(names(trt) %in% c("pid")) zero.trt <- trt[0, !(names(trt) %in% c("pid", "age.rx.start", "age.rx.end"))] zero.trt[1, c(which(names(zero.trt) != "regimen"), which(names(zero.trt) == "regimen"))] <- c(rep(list(0), times=length(zero.trt)-1L), list("")) # zero.trt[1, c(which(names(zero.trt) != "regimen"), which(names(zero.trt) == # "regimen"))] <- c(rep(0, times=length(zero.trt)-1L), "") # # # # zero.trt$ccr5 <- as.numeric(zero.trt$ccr5) # zero.trt$days_duration <- as.numeric(zero.trt$days_duration) # zero.trt$fi <- as.numeric(zero.trt$fi) # zero.trt$fhaart <- as.numeric(zero.trt$fhaart) # zero.trt$haart <- as.numeric(zero.trt$haart) # zero.trt$haart_before_first_visit <- # as.numeric(zero.trt$haart_before_first_visit) # zero.trt$nbr_drugs_in_regimen <- as.numeric(zero.trt$nbr_drugs_in_regimen) # zero.trt$nnrti <- as.numeric(zero.trt$nnrti) # zero.trt$nrti <- as.numeric(zero.trt$nrti) # zero.trt$pi <- as.numeric(zero.trt$pi) # zero.trt$pi_boosted <- as.numeric(zero.trt$pi_boosted) # zero.trt$art.0 <- as.numeric(zero.trt$art.0) rownames(zero.trt) <- "" func2 <- function(trt) { len <- length(trt$age.rx.end) if(!is.na(trt$age.rx.end[len])) { trt <- rbind(trt, cbind(pid=trt$pid[1], age.rx.start=trt$age.rx.end[len] + 1, age.rx.end=NA, zero.trt)) } if(!is.na(trt$age.rx.start[1])) { trt <- rbind(cbind(pid=trt$pid[1], age.rx.start=NA, age.rx.end=trt$age.rx.start[1] - 1, zero.trt), trt) } len <- length(trt$age.rx.end) delta <- trt$age.rx.start[-1] - trt$age.rx.end[-len] extra <- cbind(pid=trt$pid[-len], age.rx.start = trt$age.rx.end[-len] + 1, age.rx.end=trt$age.rx.end[-len] + delta - 1, zero.trt[rep.int(1, times=length(delta)),])[delta > 1,] if(nrow(extra) > 0) { trt <- rbind(trt,extra) trt <- trt[order(trt$age.rx.start, na.last=FALSE),] } trt$days_duration <- trt$age.rx.end - trt$age.rx.start + 1 return(trt) } s.trt <- lapply(split(trt, trt$pid), func2) func <- function(j) { subs <- s.trt[[j$pid[1]]] if(length(subs) == 0) { cat("No Treament found for ",j$pid[1],"\n",sep='') return( cbind(j, zero.trt)) } subs <- subs[, dropped.cols] ntrt <- nrow(subs) start <- subs$age.rx.start end <- subs$age.rx.end if(is.na(end[ntrt])) { end[ntrt] <- Inf } if(is.na(start[1])) { start[1] <- -Inf } inds <- sapply(j$age, function(j, start, end){ if(length(ans <- which(j >= start & j <= end)) == 0) NA else ans }, start, end) if(any(is.na(inds))) stop("an NA inds found") ans <- subs[inds, ] return(cbind(j, ans)) } ans <- unsplit(lapply(split(sub.x[c("age","pid")], sub.x$pid), func), sub.x$pid) tmp.sub <- merge(sub.x[,subx.cols], ans, by=c("pid","age"), all=TRUE) new.trt <- do.call(rbind, args=s.trt) new.trt <- new.trt[!(!is.na(new.trt$days_duration) & new.trt$days_duration <= 30 & new.trt$regimen == " "),] tmp.trt <- merge(cbind(age = new.trt$age.rx.start[!is.na(new.trt$age.rx.start)], new.trt[!is.na(new.trt$age.rx.start),], start=1), cbind(age = new.trt$age.rx.end[!is.na(new.trt$age.rx.end)], new.trt[!is.na(new.trt$age.rx.end),], end=1), all=TRUE) new.sub <- merge(cbind(tmp.sub, clinic=1), tmp.trt, all=TRUE) new.sub$clinic[is.na(new.sub$clinic)] <- 0 new.sub$start[is.na(new.sub$start)] <- 0 new.sub$end[is.na(new.sub$end)] <- 0 new.sub <- new.sub[!(!duplicated(new.sub$pid) & new.sub$end == 1 & new.sub$regimen == " " & new.sub$clinic == 0) & !(!duplicated(new.sub$pid, fromLast=TRUE)& new.sub$regimen==" " & new.sub$start==1 & new.sub$clinic == 0),] for(vals in rep.vals) { new.sub[new.sub$pid == vals$pid, repeating.cols] <- vals[repeating.cols] } total<- cumsum(new.sub$haart) ind <- which(!duplicated(new.sub$pid)) haart.sum <- total - rep(total[ind]-new.sub$haart[ind], times=diff(c(ind, length(total)+1))) new.sub$fhaart <- as.integer(haart.sum == 1) new.sub$post.fhaart <- as.integer(haart.sum > 0)