tx <- ifelse(is.na(dem$trx),as.character(dem$trx.seq), as.character(dem$trx)) tx <- ifelse(tx %in% c('P/P','PLACEBO'),'placebo','drug') names(tx) <- dem$pid abl <- function(dframe) { a <- sapply(dframe, label) abbreviate(ifelse(a=='', names(dframe), a), minlength=8) } #---------------------------------------------------------------------- ## ## AE analysis (05162001).ssc ## ignore <- which(names(aesumm) %in% c('pid','week')) lbls <- abl(aesumm)[-ignore] trt <- tx[aesumm$pid] setps(varclus.ae1, h=5, sublines=3, toplines=1) par(mfrow=c(1,2)) par(mar=c(4,4,2,1)) par(mgp=c(2.5,.5,0)) par('mar');par('mgp') w <- 1 for(tr in c('placebo','drug')) { v <- varclus(~ae045 + ae077 + ae104 + ae109 + ae126 + ae224 + ae392, sim='bothpos', data=aesumm, subset=week==w & trt==tr) plot(v, ylim=c(0,.01), labels=lbls) title(paste('Week',w,tr)) } dev.off() wks <- sort(unique(aesumm$week)) s <- array(NA, c(length(lbls),length(lbls),length(wks))) setps(multsim.ae,h=7) opar <- par(mar=c(0,2.1,0,0)) for(tr in c('placebo','drug')) { for(i in 1:length(wks)) { w <- wks[i] s[,,i] <- varclus(~ae045 + ae077 + ae104 + ae109 + ae126 + ae224 + ae392, sim='ccbothpos', data=aesumm, subset=week==w & trt==tr)$sim } plotMultSim(s, wks, vname=lbls, slimds=T, add=tr=='drug', slim=c(0,1), lty=1+(tr=='placebo')) } dev.off() #----------------------------------------------------------------- ## ## Lab analysis (05152001).ssc ## na <- naclus(labsumm) naplot(na,which='na per var') numna <- function(x) sum(is.na(x)) for(w in sort(unique(labsumm$week))) { cat(w,'\n') print(sapply(labsumm,numna)) } #lab017 lab024 lab025 lab029 lab009 lab019 lab028 lab011 lab027 lab009 lab012 #missing a lot wks <- c(8) setps(varclus.lab8, h=5, sublines=3, toplines=1) par(mfrow=c(length(wks),2)) for(w in wks) { lr <- labsumm[labsumm$week==w,-(1+c(17,24,25,29,19,28,11,27,9,12))] ignore <- which(names(lr) %in% c('pid','week')) trt <- tx[lr$pid] for(tr in c('placebo','drug')) { v <- varclus(~., data=lr[,-ignore], subset=trt==tr) plot(v, ylim=c(0,1.0), labels=abl(lr)[-ignore]) #, ylab='rho^2') title(paste('Week',w,tr)) } } dev.off() #------------------------------------------------------------------ ## ## Lab analysis - plotMultSim (05152001).ssc ## wks <- sort(unique(labsumm$week)) wks <- wks[wks != 1] #lbls <- abl(labsumm)[1+c(1:8,10,13:16,18,20:23,26,30:34)] lbls <- abl(labsumm)[1+c(3,7,8,13,14,16,20,21,23,30,31,32,34)] trt <- tx[labsumm$pid] s <- array(NA, c(length(lbls),length(lbls),length(wks))) setps(multsim.lab, h=7) opar <- par(mar=c(0,2,0,0)) par('mar') for(tr in c('placebo','drug')) { for(i in 1:length(wks)) { w <- wks[i] s[,,i] <- varclus(~lab003 + lab007 + lab008 + lab013 + lab014 + lab016 + lab020 + lab021 + lab023 + lab030 + lab031 + lab032 + lab034, data=labsumm, subset=week==w & trt==tr)$sim } plotMultSim(s, wks, vname=lbls, add=tr=='drug', slim=c(0,1), lty=1+(tr=='placebo')) } par(opar) dev.off() #-------------------------------------------------------------------- ## ## ECG analysis - plotMultSim (05162001).ssc ## #ecgtemp <- all[,Cs(pid,week,ecg002,ecg003,ecg004,ecg005,ecg007,ecg008)] ignore <- which(names(ecgsumm) %in% c('pid','week')) lbls <- abl(ecgsumm[,Cs(ecg002,ecg003,ecg004,ecg005,ecg007,ecg008)]) trt <- tx[ecgsumm$pid] setps(varclus.ecg8, h=5, sublines=3, toplines=1) par(mfrow=c(1,2)) #par(mar=c(4,4,2,1)) #par(mgp=c(2.5,.5,0)) w <- 8 for(tr in c('placebo','drug')) { v <- varclus(~ ecg002 + ecg003 + ecg004 + ecg005 + ecg007 + ecg008, data=ecgsumm, subset=week==w & trt==tr) plot(v, labels=lbls) title(paste('Week',w,tr)) } dev.off() wks <- sort(unique(ecgsumm$week)) wks <- wks[wks != 1] s <- array(NA, c(length(lbls),length(lbls),length(wks))) opar <- par(mar=c(0,0,4.1,0)) for(tr in c('placebo','drug')) { for(i in 1:length(wks)) { w <- wks[i]; cat(w) s[,,i] <- varclus(~ecg002 + ecg003 + ecg004 + ecg005 + ecg007 + ecg008, data=ecgsumm, subset=week==w & trt==tr)$sim } plotMultSim(s, wks, vname=lbls, add=tr=='drug', slim=c(0,1), lty=1+(tr=='placebo')) } # No trends seen, don't use graph #------------------------------------------------------------------ ## ## All data analysis (05162001).ssc ## all.tx <- ifelse(is.na(all$trx),as.character(all$trx.seq), as.character(all$trx)) all.tx <- factor(ifelse(all.tx %in% c('P/P','PLACEBO'),'placebo','drug')) drug <- 1*(all.tx=='drug') library(rpart) WBC <- all$lab034 HR <- all$ecg008 uncorr.qt <- all$ecg007 nausea <- factor(all$ae126,0:1,c('N','Y')) diarrhea <- factor(all$ae077,0:1,c('N','Y')) creatinine <- all$lab016 platelets <- all$lab030 protein <- all$lab031 lymphocytes <- all$lab003 BUN <- all$lab014 axis <- all$ecg002 r <- rpart(ae392 ~ all.tx + age + sex + race + height + weight + bmi + pack.yrs + smk.stat + lab003 + lab007 + lab008 + lab013 + lab014 + lab016 + lab020 + lab021 + lab023 + lab030 + lab031 + lab032 + WBC + ecg001 + ecg002 + ecg003 + ecg004 + ecg005 + ecg006 + ecg007 + HR, data=all, subset=week==8, control=rpart.control(minbucket=25)) plot(r, main = "Distribution of AE COAD at Week 8") text(r) post(r, file='rpart.coad.week8.ps', title='Chronic Obstructive Airways Disease\nWeek 8', horizontal = F, width = 4, height = 4, pointsize=10, maximize = T, onefile = F, print.it = F) r <- rpart(ae392 ~ week + all.tx + age + sex + race + height + weight + bmi + pack.yrs + smk.stat + lab003 + lab007 + lab008 + lab013 + lab014 + lab016 + lab020 + lab021 + lab023 + lab030 + lab031 + lab032 + WBC + ecg001 + ecg002 + ecg003 + ecg004 + ecg005 + ecg006 + uncorr.qt + ecg008, control=rpart.control(minbucket=25,cp=.002), data=all) plot(r, main = "Distribution of AE COAD") text(r) post(r, file='rpart.coad.ps', title='Chronic Obstructive Airways Disease\nAll Weeks', horizontal = F, width = 4, height = 4, pointsize=9, maximize = T, onefile = F, print.it = F) r <- rpart(drug ~ week + lymphocytes + lab007 + lab008 + lab013 + BUN + creatinine + lab020 + lab021 + lab023 + platelets + protein + lab032 + WBC + ecg001 + axis + ecg003 + ecg004 + ecg005 + ecg006 + uncorr.qt + HR + ae045 + diarrhea + ae104 + ae109 + nausea + ae224 + ae392, control=rpart.control(minbucket=100,cp=.0013), data=all) post(r, file='rpart.drug.ps', title='Regression Tree for Prob[drug]', horizontal = F, width = 6, height = 7, pointsize=7, maximize = T, onefile = F, print.it = F, compress=F) # slight mod of plot.rpart to make lines fainter pr <- function(tree, uniform = F, branch = 1, compress = F, nspace, margin = 0, minbranch = 0.3, ...) { if(!inherits(tree, "rpart")) stop("Not an rpart object") if(compress & missing(nspace)) nspace <- branch if(!compress) nspace <- -1 #means no compression dev <- dev.cur() if(dev == 1) dev <- 2 assign(paste(".rpart.parms", dev, sep = "."), list(uniform = uniform, branch = branch, nspace = nspace, minbranch = minbranch), frame = 0) #define the plot region temp <- rpartco(tree) x <- temp$x y <- temp$y temp1 <- range(x) + diff(range(x)) * c( - margin, margin) temp2 <- range(y) + diff(range(y)) * c( - margin, margin) plot(temp1, temp2, type = "n", axes = F, xlab = "", ylab = "", ...) # Draw a series of horseshoes or V's, left son, up, down to right son # NA's in the vector cause lines() to "lift the pen" node <- as.numeric(row.names(tree$frame)) temp <- rpart.branch(x, y, node, branch) if(branch > 0) text(x[1], y[1], "|") lines(c(temp$x), c(temp$y), col=.5, lwd=.65) invisible(list(x = x, y = y)) } setps(rpart.drug, h=5, toplines=1) pr(r, uniform = F, branch = 0.1, compress = F, margin = 0.1) text(r, all = TRUE, use.n = TRUE, fancy = F, digits = 5, pretty = 0) title('Regression Tree for Prob[drug]') dev.off() headache <- all$ae045 ab.pain <- all$ae104 dyspepsia <- all$ae109 upper.resp.infect <- all$ae224 coad <- all$ae392 albumin <- all$lab007 alk.phos <- all$lab008 bilirubin <- all$lab013 glucose <- all$lab020 hematocrit <- all$lab021 potassium <- all$lab023 RBC <- all$lab032 atrial.rate<- all$ecg001 corr.qt <- all$ecg003 pr <- all$ecg004 qrs <- all$ecg005 rr <- all$ecg006 f <- lrm(drug ~ week + rcs(lymphocytes,4) + rcs(albumin,4) + rcs(alk.phos,4) + rcs(bilirubin,4) + rcs(BUN,4) + rcs(creatinine,4) + rcs(glucose,4) + rcs(hematocrit,4) + rcs(potassium,4) + rcs(platelets,4) + rcs(protein,4) + rcs(RBC,4) + rcs(WBC,4) + rcs(atrial.rate,4) + rcs(axis,4) + rcs(corr.qt,4) + rcs(pr,4) + rcs(qrs,4) + rcs(rr,4) + headache + diarrhea + ab.pain + dyspepsia + nausea + upper.resp.infect + coad, data=all, subset=week!=1 & week<=12) # Used 2617 placebo 5103 drug records non-NA # Linear model Obs Max Deriv Model L.R. d.f. P C Dxy Gamma 7720 1.732445e-08 138.0048 27 1.110223e-16 0.5738161 0.1476323 0.1493591 Tau-a R2 Brier 0.06617018 0.0245338 0.2203164 # Spline model Obs Max Deriv Model L.R. d.f. P C Dxy Gamma Tau-a 7720 3.19509e-08 257.1331 65 0 0.6074139 0.2148279 0.2163361 0.09628788 R2 Brier 0.04536197 0.2168652 setps(anova.drug,h=5,toplines=1) plot(anova(f)) title('Independent Predictors of Drug') dev.off() d <- data.frame(lymphocytes,albumin,alk.phos,hematocrit,bilirubin, creatinine,bilirubin,WBC,RBC,platelets, corr.qt,qrs,all.tx)[all$week==8,] setps(ecdf.wk8,h=6,pointsize=12) par(mfrow=c(3,4)) ecdf(d, group=d$all.tx, label.curve=F) dev.off() rm(WBC,HR,uncorr.qt,nausea,diarrhea,creatinine,platelets,protein,lymphocytes, BUN, axis, pr, headache, ab.pain, dyspepsia, upper.resp.infect, coad, albumin, alk.phos, bilirubin, hematocrit, potassium, RBC, atrial.rate, corr.qt, pr, qrs, rr, glucose, d)