-- JonathanSchildcrout - 09 Feb 2015

Some functions that I sometimes use when managing longitudinal data

cluster.summary <- function( id, x, fun ){
    xlist <- split( x, id )
    nj <- unlist( lapply( xlist, length ) )
    xj <- unlist( lapply( xlist, fun) )
    xsummary <- rep( xj, nj )
    xsummary}
## rep the first value of Var for each id
FirstValue <- function(Var, id){  
    xlist <- split( Var, id )
    nj   <- unlist( lapply( xlist, length ) )
    dup  <-  duplicated(id)
    val  <- Var[!dup]
    out  <- rep(val, nj)
    out}

## create lagged value
lagged <- function(x, id){ x.lag <- c(NA, x[-length(x)])
                           dupid <- duplicated(id)
                           x.lag[!dupid] <- NA
                           x.lag}

## "ever taken" function
EverTaken <- function(codevar, codeval, value, id){
  tmp     <- ifelse(codevar==codeval, value, 0)
  Cumul   <- c(unlist(tapply(X=tmp, INDEX=id, FUN=cummax)))
  Cumul   <- as.integer(Cumul>=1)
  Cumul
}

## most recent value function
LastValue <- function(codevar, codeval, value, id){

  out  <- value*(codevar==codeval) + -999*(codevar!=codeval)
  for (i in 2:length(out)){
    if (id[i] == id[(i-1)] & codevar[i]!=codeval){ out[i] <- out[(i-1)]
    }else if ( id[i] != id[(i-1)] & codevar[i]!=codeval ){ out[i] <- -999 }
  }
  out
}

## Within cluster summary measure
cluster.sum <- function( id, x, summarymeas ){
#
  xlist <- split( x, id )
  nj <- unlist( lapply( xlist, length ) )
  xj <- unlist( lapply( xlist, summarymeas) )
  xout <- rep( xj, nj )
  xout
}

## Within cluster min time such that the variable named TheVar has a value equal to TheVal
## Note that for this to work, in each cluster, TheVar must equal TheVal at some point 
##  
cluster.tm.min <- function( id, time, TheVar, TheVal ){
#
  nj   <- unlist( lapply( split( time, id ),                                    length ) )
  xj   <- unlist( lapply( split( time[TheVar==TheVal], id[TheVar==TheVal] ), min) )
  xout <- rep( xj, nj )
  xout
}

## Within cluster first time for a particular CodeVal
cluster.first.time <- function( id, TimeVar, CodeVar, CodeVal ){
#
  ## create the list of the number of replicates for each  
  nj          <- unlist( lapply( split( TimeVar, id ), length ) )
  print("blah")
  uniqueid        <- unique(id)
  uniqueidyescode <- unique(id[CodeVar==CodeVal])
  uniqueidnocode  <- unique(uniqueid[!(uniqueid %in% uniqueidyescode)])
  
  ## augment each vector (id, CodeVar, and X) and give a value of 9999 for the X value when 
  ## when a subject has no entry for the code of interest 
  idnew       <- c(id, uniqueidnocode)
  CodeVarnew  <- c(CodeVar, rep(CodeVal, length(uniqueidnocode)))
  Xnew        <- c(TimeVar,       rep(99999, length(uniqueidnocode)))
  
  idnew1       <- idnew[CodeVarnew==CodeVal]
  CodeVarnew1  <- CodeVarnew[CodeVarnew==CodeVal]
  Xnew1        <- Xnew[CodeVarnew==CodeVal]

 
  xj <- unlist( lapply(split( Xnew1, idnew1 ), min))
  
  xout <- rep( xj, nj )
  xout
}

## Within cluster summary measure (conditioned)
cluster.sum.cond <- function( id, x, condvar1, condvar2, summarymeas ){
#
  xlist <- split( x, id )
  nj    <- unlist( lapply( xlist, length ) )
  xlist <- split( eval( parse(text=paste("x",  "[condvar1==condvar2]", sep=""))), 
                  eval( parse(text=paste("id", "[condvar1==condvar2]", sep=""))))
  xj    <- unlist( lapply( xlist, summarymeas) )
  xout  <- rep( xj, nj )
  xout
}

## The last value of variable VarName, in the time interval determined by a LowTime,
## a HighTime, and the code value upon which we subset, CodeValDeterminingInterval.
## Output the values (Var), the time it occurred (Time), and the id (Case)

LastValueInInterval <- function(Var, TimeVar, LowTime, HighTime, CodeValDeterminingInterval){  

  ##
  timeInt      <- TimeVar[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
  caseInt      <-    case[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
  VarInt       <-     Var[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
  
  LastTimeInInt <- cluster.sum( id=caseInt, x=timeInt, summarymeas=max )
  
  Var  <- VarInt[timeInt==LastTimeInInt]
  Time <- timeInt[timeInt==LastTimeInInt]
  Case <- caseInt[timeInt==LastTimeInInt]
  
  out <- list(Var=Var, Time=Time, Case=Case)
  out
}

## The first value of variable VarName, in the time interval determined by a LowTime,
## a HighTime, and the code value upon which we subset, CodeValDeterminingInterval.
## Output the values (Var), the time it occurred (Time), and the id (Case)

FirstValueInInterval <- function(Var, TimeVar, LowTime, HighTime, CodeValDeterminingInterval){  

  ##
  timeInt      <- TimeVar[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
  caseInt      <-    case[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
  VarInt       <-     Var[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
  
  FirstTimeInInt <- cluster.sum( id=caseInt, x=timeInt, summarymeas=min )
  
  Var  <- VarInt[timeInt==FirstTimeInInt]
  Time <- timeInt[timeInt==FirstTimeInInt]
  Case <- caseInt[timeInt==FirstTimeInInt]
  
  out <- list(Var=Var, Time=Time, Case=Case)
  out
}

## The lowest value of variable VarName, in the time interval determined by a LowTime,
## a HighTime, and the code value upon which we subset, CodeValDeterminingInterval.
## Output the lowest value (LowVal), the time it occurred (LowTime), Number of 
## observations upon which the minimum was taken (NumObs), and the id (Case)

NadirInInterval <- function(Var, TimeVar, LowTime, HighTime, CodeValDeterminingInterval){  

  ##
  timeInt      <- TimeVar[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
  caseInt      <-    case[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
  VarInt       <-     Var[TimeVar>LowTime & TimeVar<=HighTime & code==CodeValDeterminingInterval]
  
  LowValtmp    <- cluster.sum( id=caseInt, x=VarInt, summarymeas=min )
  LowTimetmp   <- cluster.sum.cond( id=caseInt, x=timeInt, condvar1=VarInt, condvar2=LowValtmp, summarymeas=min )
  Nobstmp      <- cluster.sum( id=caseInt, x=VarInt, summarymeas=length )
  
  ## pick the first time where the value is at its nadir
  LowTime   <- timeInt[timeInt==LowTimetmp]
  LowVal    <- LowValtmp[timeInt==LowTimetmp]
  NumObs    <- Nobstmp[timeInt==LowTimetmp]
  Case      <- caseInt[timeInt==LowTimetmp]
  
  out <- list(LowTime=LowTime, 
              LowVal=LowVal, 
              NumObs=NumObs, 
              Case=Case)
  out
}
################################################
Topic revision: r3 - 03 Jan 2018, JonathanSchildcrout
 

This site is powered by FoswikiCopyright © 2013-2022 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding Vanderbilt Biostatistics Wiki? Send feedback