## Code to conduct analyses described in our Epidemiology paper using a simulated dataset. ## ## Users should also download ODSCodeFunctions.R and FullCohort.RData ## ## This script conducts analyses of the full cohort. It also conducts analyses ## for an outcome dependent sample with the three methods described in the manuscript, ## i.e., ascertainment corrected maximum likelihood (ACML), weighted likeihoood (WL), and a ## multiple imputation extension of the ACML analysis ## Please search in the following places for updated code and examples: ## http://biostat.mc.vanderbilt.edu/wiki/bin/edit/Main/ODSandLDA ## https://github.com/mercaldo/MMLB ## This code will soon be available on the comprehensive R network with an accompanying ## vignette to describe it further rm(list=ls()) ## Install all needed R packages #install.packages("devtools") #library(devtools) #install_github('mercaldo/MMLB', force=TRUE) #install.packages("MASS") #install.package("mitools") ## Load R libraries library(MMLB) library(MASS) library(mitools) ## Source functions used to conduct the MI analysis source("ODSCodeFunctions.R") ## Load Full Cohort data that contains 5000 individuals ## The dataframe is named FullCohort. load("FullCohort.RData") ## Look at the data head(FullCohort, n=50) summary(FullCohort) # Perform full cohort analysis fc.fit <- mm(Y~time*Xe+C,data=FullCohort, lv.formula=~1,t.formula=~1, id=id,q=50, verbose=FALSE) summary(fc.fit) ## Unique ids in each sampling stratum ## The outcome is relatively rare and so proportionately few subjects are in stratum 3. IDinSS <- tapply(FullCohort$id, FullCohort$ss, unique) NperStratum <- c(unlist(lapply(IDinSS, length))) NperStratum ## Expected / desired ods sample size per stratum and induced sampling probabilities NSampledPerStratum <- c(100, 1000, 100) SampProbs <- NSampledPerStratum / NperStratum SampProbs ## Sample from the full cohort to create the subsample SamplingList <- data.frame(id=c(unlist(IDinSS)), SampProbi=rep(SampProbs, NperStratum)) SamplingList$Sampled <- rbinom(5000, 1, SamplingList$SampProbi) SamplingList <- SamplingList[order(SamplingList$id),] ## Merge the full cohort with the sampling list and set unsampled subjects' (Sampled=0) Xe value set to NA FullCohort.na <- merge(SamplingList, FullCohort, by="id") FullCohort.na$Xe[FullCohort.na$Sampled==0] <- NA ## Keep those who were sampled to create the ODS dataset odsdat <- FullCohort.na[FullCohort.na$Sampled==1,] # ODS analysis using ascertaintment corrected (maximum) likelihood # We are fitting this with a marginalized transition and latent variable model (Schildcrout and Heagerty, 2007, Biometrics) fit.acml <- mm(Y~time*Xe+C, dat=odsdat, id=id, lv.formula=~1, t.formula=~1, cond.like=TRUE, samp.probs=SampProbs, iter.lim=1000, verbose=FALSE, q=20, return_args=TRUE) summary(fit.acml) # ODS analysis using the weighted likelihood # Important note: Do not use model based covariances (mod.cov) if using weighted likelihood. Use rob.cov for a robust, sandwich estimators fit.wl <- mm(Y~time*Xe+C, dat=odsdat, id=id, lv.formula=~1, t.formula=~1, cond.like=FALSE, samp.probi=odsdat$SampProbi, iter.lim=1000, verbose=FALSE, q=20) summary(fit.wl) ## Imputation based analysis fit.mi <- mi.analysis(FullCohortData=FullCohort.na, Sampled=Sampled, mean.formula=Y~time*Xe+C, lv.formula=~1, t.formula=~1 , id=id, cond.like=TRUE, samp.probs=SampProbs, iter.lim=1000, verbose=TRUE, q=20, M=30, marg.exp.formula=Xe~C, EXPVAR = "Xe") summary(fit.mi)