Examples of Dealing with Multiple Data Frames Stored in a List
Common Application: Used sasxport.get to import multiple SAS datasets into one S list object
> a <- data.frame(x1=1:3, x2=c('a','b','c'), x3=2:4)
> a <- upData(a, labels=c(x1='Label for x1',x3='Label for x3'),
+ units=c(x1='mmHg', x3='minutes'))
> b <- data.frame(x1=3:5, x4=5:7)
> b <- upData(b, labels=c(x1='Label for x1'), units=c(x1='cm'))
> d <- data.frame(x5=1:3, x6=2:4)
> w <- llist(a,b,d) # llist in Hmisc remembers argument names
> contents(w)
Obs Var Var.NA
a 3 3 0
b 3 2 0
d 3 2 0
> for(u in names(w)) print(describe(w[[u]], descript=u)
a
3 Variables 3 Observations
---------------------------------------------------------------------------
x1 : Label for x1 [mmHg]
n missing unique Mean
3 0 3 2
1 (1, 33%), 2 (1, 33%), 3 (1, 33%)
---------------------------------------------------------------------------
x2
n missing unique
3 0 3
a (1, 33%), b (1, 33%), c (1, 33%)
---------------------------------------------------------------------------
x3 : Label for x3 [minutes]
n missing unique Mean
3 0 3 3
2 (1, 33%), 3 (1, 33%), 4 (1, 33%)
---------------------------------------------------------------------------
b
2 Variables 3 Observations
. . .
> n <- unlist(lapply(w, names))
> datadict <-
+ data.frame(dataset=rep(names(w), sapply(w,length)),
+ variable=n,
+ label=unlist(lapply(w, function(x) sapply(x, label))),
+ units=unlist(lapply(w, function(x) sapply(x, units))),
+ row.names=NULL)
> datadict
dataset variable label units
1 a x1 Label for x1 mmHg
2 a x2
3 a x3 Label for x3 minutes
4 b x1 Label for x1 cm
5 b x4
6 d x5
7 d x6
> ## print in order of variable names
> i <- order(datadict$variable)
> datadict[i,]
dataset variable label units
1 a x1 Label for x1 mmHg
4 b x1 Label for x1 cm
2 a x2
3 a x3 Label for x3 minutes
5 b x4
6 d x5
7 d x6
> ## check for inconsistencies in labels or units (when non-blank)
> chka <- function(atr) {
> w <- tapply(datadict[[atr]], datadict$variable,
+ function(x) length(unique(x[x != ""])))
+ if(any(w > 1))
+ cat('\nVariables with inconsistent ', atr, ' across datasets:\n',
+ paste(names(w[w > 1]),collapse=' '),'\n', sep='')
+ invisible()
+ }
> chka('label')
> chka('units')
Variables with inconsistent units across datasets:
x1
--
FrankHarrell - 13 Jun 2004