--
JonathanSchildcrout - 09 Feb 2015
Some functions that I sometimes use when managing longitudinal data
cluster.summary <- function( id, x, fun ){
xlist <- split( x, id )
nj <- unlist( lapply( xlist, length ) )
xj <- unlist( lapply( xlist, fun) )
xsummary <- rep( xj, nj )
xsummary}
## rep the first value of Var for each id
FirstValue <- function(Var, id){
xlist <- split( Var, id )
nj <- unlist( lapply( xlist, length ) )
dup <- duplicated(id)
val <- Var[!dup]
out <- rep(val, nj)
out}
## create lagged value
lagged <- function(x, id){ x.lag <- c(NA, x[-length(x)])
dupid <- duplicated(id)
x.lag[!dupid] <- NA
x.lag}
## "ever taken" function
EverTaken <- function(codevar, codeval, value, id){
tmp <- ifelse(codevar==codeval, value, 0)
Cumul <- c(unlist(tapply(X=tmp, INDEX=id, FUN=cummax)))
Cumul <- as.integer(Cumul>=1)
Cumul
}
## most recent value function
LastValue <- function(codevar, codeval, value, id){
out <- value*(codevar==codeval) + -999*(codevar!=codeval)
for (i in 2:length(out)){
if (id[i] == id[(i-1)] & codevar[i]!=codeval){ out[i] <- out[(i-1)]
}else if ( id[i] != id[(i-1)] & codevar[i]!=codeval ){ out[i] <- -999 }
}
out
}
## Within cluster summary measure
cluster.sum <- function( id, x, summarymeas ){
#
xlist <- split( x, id )
nj <- unlist( lapply( xlist, length ) )
xj <- unlist( lapply( xlist, summarymeas) )
xout <- rep( xj, nj )
xout
}
## Within cluster min time such that the variable named TheVar has a value equal to TheVal
## Note that for this to work, in each cluster, TheVar must equal TheVal at some point
##
cluster.tm.min <- function( id, time, TheVar, TheVal ){
#
nj <- unlist( lapply( split( time, id ), length ) )
xj <- unlist( lapply( split( time[TheVar==TheVal], id[TheVar==TheVal] ), min) )
xout <- rep( xj, nj )
xout
}
## Within cluster first time for a particular CodeVal
cluster.first.time <- function( id, TimeVar, CodeVar, CodeVal ){
#
## create the list of the number of replicates for each
nj <- unlist( lapply( split( TimeVar, id ), length ) )
print("blah")
uniqueid <- unique(id)
uniqueidyescode <- unique(id[CodeVar==CodeVal])
uniqueidnocode <- unique(uniqueid[!(uniqueid %in% uniqueidyescode)])
## augment each vector (id, CodeVar, and X) and give a value of 9999 for the X value when
## when a subject has no entry for the code of interest
idnew <- c(id, uniqueidnocode)
CodeVarnew <- c(CodeVar, rep(CodeVal, length(uniqueidnocode)))
Xnew <- c(TimeVar, rep(99999, length(uniqueidnocode)))
idnew1 <- idnew[CodeVarnew==CodeVal]
CodeVarnew1 <- CodeVarnew[CodeVarnew==CodeVal]
Xnew1 <- Xnew[CodeVarnew==CodeVal]
xj <- unlist( lapply(split( Xnew1, idnew1 ), min))
xout <- rep( xj, nj )
xout
}
## Within cluster summary measure (conditioned)
cluster.sum.cond <- function( id, x, condvar1, condvar2, summarymeas ){
#
xlist <- split( x, id )
nj <- unlist( lapply( xlist, length ) )
xlist <- split( eval( parse(text=paste("x", "[condvar1==condvar2]", sep=""))),
eval( parse(text=paste("id", "[condvar1==condvar2]", sep=""))))
xj <- unlist( lapply( xlist, summarymeas) )
xout <- rep( xj, nj )
xout
}
## The last value of variable VarName, in the time interval determined by a LowTime,
## a HighTime, and the code value upon which we subset, CodeValDeterminingInterval.
## Output the values (Var), the time it occurred (Time), and the id (Case)
LastValueInInterval <- function(Var, TimeVar, LowTime, HighTime, CodeValDeterminingInterval){
##
timeInt <- TimeVar[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
caseInt <- case[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
VarInt <- Var[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
LastTimeInInt <- cluster.sum( id=caseInt, x=timeInt, summarymeas=max )
Var <- VarInt[timeInt==LastTimeInInt]
Time <- timeInt[timeInt==LastTimeInInt]
Case <- caseInt[timeInt==LastTimeInInt]
out <- list(Var=Var, Time=Time, Case=Case)
out
}
## The first value of variable VarName, in the time interval determined by a LowTime,
## a HighTime, and the code value upon which we subset, CodeValDeterminingInterval.
## Output the values (Var), the time it occurred (Time), and the id (Case)
FirstValueInInterval <- function(Var, TimeVar, LowTime, HighTime, CodeValDeterminingInterval){
##
timeInt <- TimeVar[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
caseInt <- case[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
VarInt <- Var[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
FirstTimeInInt <- cluster.sum( id=caseInt, x=timeInt, summarymeas=min )
Var <- VarInt[timeInt==FirstTimeInInt]
Time <- timeInt[timeInt==FirstTimeInInt]
Case <- caseInt[timeInt==FirstTimeInInt]
out <- list(Var=Var, Time=Time, Case=Case)
out
}
## The lowest value of variable VarName, in the time interval determined by a LowTime,
## a HighTime, and the code value upon which we subset, CodeValDeterminingInterval.
## Output the lowest value (LowVal), the time it occurred (LowTime), Number of
## observations upon which the minimum was taken (NumObs), and the id (Case)
NadirInInterval <- function(Var, TimeVar, LowTime, HighTime, CodeValDeterminingInterval){
##
timeInt <- TimeVar[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
caseInt <- case[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
VarInt <- Var[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
LowValtmp <- cluster.sum( id=caseInt, x=VarInt, summarymeas=min )
LowTimetmp <- cluster.sum.cond( id=caseInt, x=timeInt, condvar1=VarInt, condvar2=LowValtmp, summarymeas=min )
Nobstmp <- cluster.sum( id=caseInt, x=VarInt, summarymeas=length )
## pick the first time where the value is at its nadir
LowTime <- timeInt[timeInt==LowTimetmp]
LowVal <- LowValtmp[timeInt==LowTimetmp]
NumObs <- Nobstmp[timeInt==LowTimetmp]
Case <- caseInt[timeInt==LowTimetmp]
out <- list(LowTime=LowTime,
LowVal=LowVal,
NumObs=NumObs,
Case=Case)
out
}
################################################