Examples of Dealing with Multiple Data Frames Stored in a List

Common Application: Used sasxport.get to import multiple SAS datasets into one S list object

> a <- data.frame(x1=1:3, x2=c('a','b','c'), x3=2:4)
> a <- upData(a, labels=c(x1='Label for x1',x3='Label for x3'),
+             units=c(x1='mmHg', x3='minutes'))

> b <- data.frame(x1=3:5, x4=5:7)
> b <- upData(b, labels=c(x1='Label for x1'), units=c(x1='cm'))

> d <- data.frame(x5=1:3, x6=2:4)
> w <- llist(a,b,d)  # llist in Hmisc remembers argument names

> contents(w)

  Obs Var Var.NA
a   3   3      0
b   3   2      0
d   3   2      0

> for(u in names(w)) print(describe(w[[u]], descript=u)

a 

 3  Variables      3  Observations
---------------------------------------------------------------------------
x1 : Label for x1 [mmHg] 
      n missing  unique    Mean 
      3       0       3       2 

1 (1, 33%), 2 (1, 33%), 3 (1, 33%) 
---------------------------------------------------------------------------
x2 
      n missing  unique 
      3       0       3 

a (1, 33%), b (1, 33%), c (1, 33%) 
---------------------------------------------------------------------------
x3 : Label for x3 [minutes] 
      n missing  unique    Mean 
      3       0       3       3 

2 (1, 33%), 3 (1, 33%), 4 (1, 33%) 
---------------------------------------------------------------------------
b 

 2  Variables      3  Observations
. . .

> n <- unlist(lapply(w, names))
> datadict <- 
+   data.frame(dataset=rep(names(w), sapply(w,length)),
+              variable=n,
+              label=unlist(lapply(w, function(x) sapply(x, label))),
+               units=unlist(lapply(w, function(x) sapply(x, units))),
+            row.names=NULL)
> datadict

  dataset variable        label   units
1       a       x1 Label for x1    mmHg
2       a       x2                     
3       a       x3 Label for x3 minutes
4       b       x1 Label for x1      cm
5       b       x4                     
6       d       x5                     
7       d       x6                     

> ## print in order of variable names
> i <- order(datadict$variable)
> datadict[i,]

  dataset variable        label   units
1       a       x1 Label for x1    mmHg
4       b       x1 Label for x1      cm
2       a       x2                     
3       a       x3 Label for x3 minutes
5       b       x4                     
6       d       x5                     
7       d       x6                     

> ## check for inconsistencies in labels or units (when non-blank)
> chka <- function(atr) {
>  w <- tapply(datadict[[atr]], datadict$variable,                     
+              function(x) length(unique(x[x != ""])))
+  if(any(w > 1))
+   cat('\nVariables with inconsistent ', atr, ' across datasets:\n', 
+       paste(names(w[w > 1]),collapse=' '),'\n', sep='')
+  invisible()
+ }

> chka('label')
> chka('units')

Variables with inconsistent units across datasets:
x1

-- FrankHarrell - 13 Jun 2004
Edit | Attach | Print version | History: r2 < r1 | Backlinks | View wiki text | Edit WikiText | More topic actions...
Topic revision: r1 - 13 Jun 2004, FrankHarrell
 

This site is powered by FoswikiCopyright © 2013-2022 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding Vanderbilt Biostatistics Wiki? Send feedback