Using the Hmisc package's summary.formula() function

This following is an illustration of the many uses of the Hmisc package's summary.formula() function. The sample dummy file samplefile.txt is used.

library(Hmisc)
library(Design)

x<-read.table("samplefile.txt", header=TRUE)

x<-upData(x,
   labels=c(age="Age", race="Race", sex="Sex",
      weight="Weight", visits="No. of Visits",
      tx="Treatment"),
   levels=list(sex=c("Female", "Male"),
      race=c("Black", "Caucasian", "Other"),
      tx=c("Drug X", "Placebo")),
   units=c(age="years", weight="lbs."))
contents(x)

# (1) overall summary of each variable in x
# --> method="response" (default)
summary( ~ ., data=x)

# (2) summary of each variable across sex 
# --> method="reverse"
summary(sex ~ ., data=x, method="reverse")
summary(sex ~ ., data=x, method="reverse", overall=TRUE)

# (3) summary of each variable across sex, testing for differences
# across sex
# --> method="reverse"
# --> test=TRUE (only for method="reverse")
summary(sex ~ ., data=x, method="reverse", test=TRUE)

# (4) cross classification and marginal statistics
# --> method="cross"
with(x, summary(age ~ sex, method="cross")) # defaults to mean
with(x, summary(age ~ sex + race, method="cross")) # defaults to mean

# ---> fun = ...
with(x, summary(age ~ sex, method="cross", fun=sd))
with(x, summary(race ~ sex, method="cross", fun=table)) # error
# if not matrix / numerical
with(x, summary(unclass(race) ~ unclass(sex), method="cross",
   fun=table))

# --> fun = ... some useful Hmisc functions
with(x, summary(weight ~ race, method="cross", fun=smean.sd)) 
with(x, summary(weight ~ race, method="cross", fun=smean.cl.boot)) 
with(x, summary(weight ~ race, method="cross", fun=smean.cl.normal)) 
with(x, summary(weight ~ race, method="cross", fun=smean.sdl)) 
with(x, summary(weight ~ race, method="cross", fun=smedian.hilow)) 

# --> fun = ... (user defined functions)
g <- function(y) apply(y, 2, quantile, c(.25,.5,.75))
with(x, summary(age ~ sex, method="cross", fun=g)) 
with(x, summary(age ~ sex + race, method="cross", fun=g)) 

smean.sd.range<-function (x, na.rm = TRUE)
{
    if (na.rm)
        x <- x[!is.na(x)]
    n <- length(x)
    if (n == 0)
        return(c(Mean = NA, SD = NA, Min = NA, Max = NA))
    xbar <- sum(x)/n
    sd <- sqrt(sum((x - xbar)^2)/(n - 1))
    mn <- min(x)
    mx <- max(x)
    c(Mean = xbar, SD = sd, Min = mn, Max = mx)
}
with(x, summary(age ~ race, method="cross",
   fun=smean.sd.range)) 

f <- function(y) apply(y, 2, smean.sd.range)
y <- with(x, cbind(age, weight))
with(x, summary(y ~ race, fun=f, method="cross"))
   
# --------------------------------------------------------------------
# Use with the sink function to write summary tables
# to an output file
# --------------------------------------------------------------------

sink("output.txt")
summary( ~ ., data=x)
summary(sex ~ ., data=x, method="reverse", test=TRUE)
with(x, summary(age ~ race, method="cross",
   fun=smean.sd.range)) 
sink()
Topic revision: r2 - 15 Nov 2006, TheresaScott
 

This site is powered by FoswikiCopyright © 2013-2022 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding Vanderbilt Biostatistics Wiki? Send feedback