dos('dir') Age <- c(6, 5, 4, 8, 10, 5) Age mean(Age) quantile(Age, c(.25,.75)) cor(Age, Age) 1:100 1:20 1:20 1:20 seq(1,20) resize() ?cut help(cut) args(mean) names(mean) mean(c(1,2,NA)) mean(c(1,2,NA),na.rm=T) Age age2 <- c(Age,NA) age2 mean x <- c(3,6,9,10,2.2,NA,NA,6.7) x[1:3] x[c(1,2,5)] x[-(1:3)] x is.na(x) x[!is.na(x)] x y <- 1:9 list(x=x, y=y) data.frame(x,y) x y <- 1:8 data.frame(x,y) d <- data.frame(x,y) d d$x d$y d[[1]] d[,1] d[1] d[1:2] d[1:5,1:2] d[1:5,] library(Hmisc,T) help(library='Hmisc') find(cut2) find(cut) masked() search() masked(2) find(label) label desc <- describe(hospital) page(desc, multi=T) label(hospital$duration) <- 'Duration of hospital stay' hospital$sex <- factor(hospital$sex, 1:2, c('male','female')) label(hospital$temp) <- 'First temperature following admission' hospital$antib <- factor(hospital$antib, 1:2, c('yes','no')) hospital$bact <- factor(hospital$bact, 1:2, c('yes','no')) hospital$bact names(hospital) table(hospital$bculture) hospital$bculture <- factor(hospital$bculture, 1:2, c('yes','no')) hospital$bact <- NULL hospital$service <- factor(hospital$service, 1:2, c('med','surg')) desc <- describe(hospital) page(desc, multi=T) datadensity(hospital) source('a:/hospital.s') dos('dir a:') source('a:/hospital.q') desc <- describe(hospital) page(desc, multi=T) datadensity(titanic3) hist.data.frame(titanic3) datadensity(titanic3) ecdf(titanic3) nac <- naclus(titanic3) plot(nac) naplot(nac) page(describe(titanic3),multi=T) library(rpart) g <- rpart(is.na(age) ~ survived + sex + pclass, data=titanic3) plot(g);text(g) levels(titanic3$pclass) ?rpart ?rpart.control survived + sex + pclass, g <- rpart(is.na(age) ~ survived + sex + pclass, data=titanic3, control=rpart.control(minsplit=8)) plot(g);text(g) g <- rpart(is.na(age) ~ survived + sex + pclass, data=titanic3, control=rpart.control(minsplit=6,cp=.05)) plot(g);text(g) g <- rpart(is.na(age) ~ survived + sex + pclass, control=rpart.control( minsplit=6,cp=.001), data=titanic3) plot(g);text(g) g$cptable plot(titanic3) find(pclass) find(titanic3) attach(titanic3) find(pclass) table(pclass) table(titanic3$pclass) search() masked() age <- 1:3 find(age) plot(age, survived) masked() remove('age') find(age) page(search()) edit(names(hospital)) tapply(age,pclass,mean, na.rm=T) tapply(age,pclass,var,na.rm=T) tapply(age,pclass,quantile) tapply(age,pclass,quantile, na.rm=T) tapply(age, list(sex,pclass), mean, na.rm=T) by(age, list(pclass=pclass), FUN=describe) by(age, pclass,FUN=describe) by(age, list(Passenger.Class=pclass), FUN=describe) aggregate(titanic3[c('age','parch')], sex, FUN=mean, na.rm=T) expand.grid(sex=levels(sex), pclass=levels(pclass)) x <- c('cat','dog','cat','giraffe') x <- factor(x) levels(x) ?merge.levels levels(x) <- list(Domestic=c('cat','dog'), Wild='giraffe') x levels(x) # For help do ?merge.levels levels(x) <- list(Home='Domestic', Other='Wild') x levels(x) <- list('Home Creature'='Home') x x <- c(1, 2, 2, 7, 10) y <- c(0, 0, 0, 3, 20) z <- score.binary(x>=7, y>=7) z levels(z) z <- score.binary(x>=7, y>=7, x>=7 & y>=7) z find(z) age <- c(1,2,NA,4) age.i <- impute(age) age.i is.imputed(age.i) attributes(age.i) mean(age.i) impute(age, 50) impute(age, mean) ?impute age.i y <- 4:1 plot(x, y) plot(age.i, y) for(i in 1:2) { s <- is.imputed(age.i) if(i==1)plot(age.i[!s],y[!s],pch=1) else plot(age.i[s], y[s], pch=2) } for(i in 1:2) { s <- is.imputed(age.i) if(i==1)plot(age.i[!s],y[!s],pch=1) else points(age.i[s], y[s], pch=2) } # subset=!is.imputed(age.i) # Estimate var(sample median from sample # of size 50 from log-normal distribution n <- 50 reps <- 400 meds <- single(reps) set.seed(171) for(i in 1:reps) { x <- exp(rnorm(n)) meds[i] <- median(x) } var(meds) hist(meds, nclass=40) hist(meds, nclass=30) hist(meds, nclass=25) qqnorm(meds) qqline(meds) single(5) set.seed(1) for(i in 1:reps) { x <- exp(rnorm(n)) meds[i] <- median(x) } var(meds) set.seed(173) x <- matrix(exp(rnorm(n*reps), nrow=reps, byrow=T) ) x <- matrix(exp(rnorm(n*reps)), nrow=reps, byrow=T) dim(x) meds <- apply(x, 1, median) dim(meds) length(meds) 1:4 sample(1:4, 4, rep=T) sample(1:4, 4, rep=T) sample(1:4, 4, rep=T) sample(1:4, 4, rep=T) sample(1:4, 4, rep=T) sample(1:4, 4, rep=T) sample(1:4, 4, rep=T) sample(1:4, 4, rep=T) sample(1:4, 4, rep=T) sample(1:4, 4, rep=T) sample(1:4, 4, rep=T) x <- exp(rnorm(100)) hist(x, nclass=15) mean(x) median(x) b <- bootstrap(x, mean, B=1000) ?bootstrap summary(b) plot(b) ?bootstrap page(w, multi=T) resize() ecdf(wbc) library(Hmisc,T) ecdf(wbc) attach(hospital) ecdf(wbc) ecdf(wbc,group=sex) ecdf(hospital) datadensity(hospital, group=hospital$sex, col=1:2) ?plsmo search()[1:3] detach(2) attach(titanic3) plsmo(age, survived, datadensity=T) plsmo(age, survived, group=sex, datadensity=T, col=1:2) plsmo(age, survived, group=interaction(sex,pclass), datadensity=T, col=1:6) page(describe(diabetes)) datadensity(diabetes) nac <- naclus(diabetes) plot(nac) naplot(nac) names(diabetes) v <- varclus(~. , data=diabetes[,-1]) plot(v) bwplot(cut2(age,g=5) ~ glyhb | sex*frame, data=diabetes) bwplot(cut2(age,g=5) ~ glyhb | gender*frame, data=diabetes) bwplot(cut2(age,g=4) ~ glyhb | frame, data=diabetes) bwplot(cut2(age,g=4) ~ glyhb | frame, data=diabetes, panel=panel.bpplot) ?panel.bpplot bwplot(cut2(age,g=4) ~ glyhb | frame, data=diabetes, panel=panel.bpplot, probs=seq(.01,.49,by=.01)) f <- summary(glyhb ~ gender + age + chol + height + weight + hdl, data=diabetes, fun=smedian.hilow, conf.int=.5) f <- summary(glyhb ~ gender + age + chol + gender + age + chol + f <- summary(glyhb ~ gender + age + chol + height + weight + hdl, data=diabetes, fun=function(y) quantile(y,c(.25,.5,.75))) plot(f) plot(f, which=1:3, pch=c('.','[',']') ) plot(f, which=1:3, pch=c('[','.',']') ) ecdf(~chol | gender,data=diabetes) ecdf(~chol,groups=gender,data=diabetes) ecdf(~chol,groups=gender,data=diabetes, q=c(.25,.5,.75)) ecdf(diabetes, group=diabetes$gender, label.curve=F) par(mfrow=c(2,3)) ecdf(diabetes, group=diabetes$gender, label.curve=F) par(mfrow=c(2,2)) ecdf(diabetes, group=diabetes$gender, label.curve=F) par(mfrow=c(1,1)) search()[1:4] detach(2) attach(diabetes) masked() s <- summarize(glyhb, llist(age=cut2(age,g=5), gender, frame), median, na.rm=T) page(s) options(width=70) page(s) dotplot(age~glyhb|gender*frame) dotplot(age~glyhb| gender*frame, data=s) Dotplot(age~glyhb|frame, groups=gender, data=s) Key() ?Dotplot set.seed(111) dfr <- expand.grid(month=1:12, year=c(1997,1998), reps=1:100) attach(dfr) y <- abs(month-6.5) + 2*runif(length(month)) + year-1997 s <- summarize(y, llist(month,year), smedian.hilow, conf.int=.5) xYplot(Cbind(y,Lower,Upper) ~ month, groups=year, data=s, keys='lines', method='alt') s <- summarize(y, llist(month,year), quantile, probs=c(.5,.05,.25,.75,.95), type='matrix') Dotplot(month ~ Cbind(y) | year, data=s) ?Dotplot ?setTrellis Dotplot(month ~ Cbind(y) | year, data=s, lwd=3) trellis.par.get trellis.par.get() ?trellis.par.get ?xYplot dfr <- expand.grid(month=1:12, continent=c('Europe','USA'), sex=c('female','male')) attach(dfr) set.seed(13) y <- month/10 + 1*(sex=='female') + 2*(continent=='Europe') + runif(48,-.15,.15) lower <- y - runif(48,.05,.15) upper <- y + runif(48,.05,.15) xYplot(Cbind(y,lower,upper) ~ month,subset=sex=='male'&continent=='USA') xYplot(Cbind(y,lower,upper) ~ month|continent,subset=sex=='male') xYplot(Cbind(y,lower,upper) ~ month|continent,groups=sex); Key() xYplot(Cbind(y,lower,upper) ~ month,groups=sex,subset=continent=='Europe') xYplot(Cbind(y,lower,upper) ~ month,groups=sex,subset=continent=='Europe',keys='lines') # keys='lines' causes labcurve to draw a legend where the panel is most empty xYplot(Cbind(y,lower,upper) ~ month,groups=sex,subset=continent=='Europe',method='bands') xYplot(Cbind(y,lower,upper) ~ month,groups=sex,subset=continent=='Europe',method='upper') ?setps