-- JonathanSchildcrout - 09 Feb 2015

Some functions that I sometimes use when managing longitudinal data

```cluster.summary <- function( id, x, fun ){
xlist <- split( x, id )
nj <- unlist( lapply( xlist, length ) )
xj <- unlist( lapply( xlist, fun) )
xsummary <- rep( xj, nj )
xsummary}
## rep the first value of Var for each id
FirstValue <- function(Var, id){
xlist <- split( Var, id )
nj   <- unlist( lapply( xlist, length ) )
dup  <-  duplicated(id)
val  <- Var[!dup]
out  <- rep(val, nj)
out}

## create lagged value
lagged <- function(x, id){ x.lag <- c(NA, x[-length(x)])
dupid <- duplicated(id)
x.lag[!dupid] <- NA
x.lag}

## "ever taken" function
EverTaken <- function(codevar, codeval, value, id){
tmp     <- ifelse(codevar==codeval, value, 0)
Cumul   <- c(unlist(tapply(X=tmp, INDEX=id, FUN=cummax)))
Cumul   <- as.integer(Cumul>=1)
Cumul
}

## most recent value function
LastValue <- function(codevar, codeval, value, id){

out  <- value*(codevar==codeval) + -999*(codevar!=codeval)
for (i in 2:length(out)){
if (id[i] == id[(i-1)] & codevar[i]!=codeval){ out[i] <- out[(i-1)]
}else if ( id[i] != id[(i-1)] & codevar[i]!=codeval ){ out[i] <- -999 }
}
out
}

## Within cluster summary measure
cluster.sum <- function( id, x, summarymeas ){
#
xlist <- split( x, id )
nj <- unlist( lapply( xlist, length ) )
xj <- unlist( lapply( xlist, summarymeas) )
xout <- rep( xj, nj )
xout
}

## Within cluster min time such that the variable named TheVar has a value equal to TheVal
## Note that for this to work, in each cluster, TheVar must equal TheVal at some point
##
cluster.tm.min <- function( id, time, TheVar, TheVal ){
#
nj   <- unlist( lapply( split( time, id ),                                    length ) )
xj   <- unlist( lapply( split( time[TheVar==TheVal], id[TheVar==TheVal] ), min) )
xout <- rep( xj, nj )
xout
}

## Within cluster first time for a particular CodeVal
cluster.first.time <- function( id, TimeVar, CodeVar, CodeVal ){
#
## create the list of the number of replicates for each
nj          <- unlist( lapply( split( TimeVar, id ), length ) )
print("blah")
uniqueid        <- unique(id)
uniqueidyescode <- unique(id[CodeVar==CodeVal])
uniqueidnocode  <- unique(uniqueid[!(uniqueid %in% uniqueidyescode)])

## augment each vector (id, CodeVar, and X) and give a value of 9999 for the X value when
## when a subject has no entry for the code of interest
idnew       <- c(id, uniqueidnocode)
CodeVarnew  <- c(CodeVar, rep(CodeVal, length(uniqueidnocode)))
Xnew        <- c(TimeVar,       rep(99999, length(uniqueidnocode)))

idnew1       <- idnew[CodeVarnew==CodeVal]
CodeVarnew1  <- CodeVarnew[CodeVarnew==CodeVal]
Xnew1        <- Xnew[CodeVarnew==CodeVal]

xj <- unlist( lapply(split( Xnew1, idnew1 ), min))

xout <- rep( xj, nj )
xout
}

## Within cluster summary measure (conditioned)
cluster.sum.cond <- function( id, x, condvar1, condvar2, summarymeas ){
#
xlist <- split( x, id )
nj    <- unlist( lapply( xlist, length ) )
xlist <- split( eval( parse(text=paste("x",  "[condvar1==condvar2]", sep=""))),
eval( parse(text=paste("id", "[condvar1==condvar2]", sep=""))))
xj    <- unlist( lapply( xlist, summarymeas) )
xout  <- rep( xj, nj )
xout
}

## The last value of variable VarName, in the time interval determined by a LowTime,
## a HighTime, and the code value upon which we subset, CodeValDeterminingInterval.
## Output the values (Var), the time it occurred (Time), and the id (Case)

LastValueInInterval <- function(Var, TimeVar, LowTime, HighTime, CodeValDeterminingInterval){

##
timeInt      <- TimeVar[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
caseInt      <-    case[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
VarInt       <-     Var[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]

LastTimeInInt <- cluster.sum( id=caseInt, x=timeInt, summarymeas=max )

Var  <- VarInt[timeInt==LastTimeInInt]
Time <- timeInt[timeInt==LastTimeInInt]
Case <- caseInt[timeInt==LastTimeInInt]

out <- list(Var=Var, Time=Time, Case=Case)
out
}

## The first value of variable VarName, in the time interval determined by a LowTime,
## a HighTime, and the code value upon which we subset, CodeValDeterminingInterval.
## Output the values (Var), the time it occurred (Time), and the id (Case)

FirstValueInInterval <- function(Var, TimeVar, LowTime, HighTime, CodeValDeterminingInterval){

##
timeInt      <- TimeVar[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
caseInt      <-    case[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
VarInt       <-     Var[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]

FirstTimeInInt <- cluster.sum( id=caseInt, x=timeInt, summarymeas=min )

Var  <- VarInt[timeInt==FirstTimeInInt]
Time <- timeInt[timeInt==FirstTimeInInt]
Case <- caseInt[timeInt==FirstTimeInInt]

out <- list(Var=Var, Time=Time, Case=Case)
out
}

## The lowest value of variable VarName, in the time interval determined by a LowTime,
## a HighTime, and the code value upon which we subset, CodeValDeterminingInterval.
## Output the lowest value (LowVal), the time it occurred (LowTime), Number of
## observations upon which the minimum was taken (NumObs), and the id (Case)

NadirInInterval <- function(Var, TimeVar, LowTime, HighTime, CodeValDeterminingInterval){

##
timeInt      <- TimeVar[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
caseInt      <-    case[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
VarInt       <-     Var[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]

LowValtmp    <- cluster.sum( id=caseInt, x=VarInt, summarymeas=min )
LowTimetmp   <- cluster.sum.cond( id=caseInt, x=timeInt, condvar1=VarInt, condvar2=LowValtmp, summarymeas=min )
Nobstmp      <- cluster.sum( id=caseInt, x=VarInt, summarymeas=length )

## pick the first time where the value is at its nadir
LowTime   <- timeInt[timeInt==LowTimetmp]
LowVal    <- LowValtmp[timeInt==LowTimetmp]
NumObs    <- Nobstmp[timeInt==LowTimetmp]
Case      <- caseInt[timeInt==LowTimetmp]

out <- list(LowTime=LowTime,
LowVal=LowVal,
NumObs=NumObs,
Case=Case)
out
}
################################################```
Topic revision: r3 - 03 Jan 2018, JonathanSchildcrout

• Biostatistics Webs

Copyright © 2013-2022 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding Vanderbilt Biostatistics Wiki? Send feedback