# Begin Day 1 3+3 12+5 if(x==3) { y <- 2 z <- 3 } x <- cut() getHdata(pbc) options(width=120) contents(pbc) con <- contents(pbc) con print(con, sort='names') ?contents print(con, sort='NAs') find(getHdata) describe(pbc) page(describe(pbc),multi=T) datadensity(pbc) nac <- naclus(pbc) plot(nac) naplot(nac) d <- data.frame(x=1:10,y=11:20) d find(d) find(x) find(y) rm(x,y) attach(d) find(x) find(y) search() x y d$y detach('d') # or detach(2) search() x1 <- 1:4 sex <- rep(c('male','female'),2) x1 sex subset(x1, sex=='male') d <- data.frame(x1=x1, x2=(1:4)/10, x3=11:14, sex=sex) d subset(d, sex=='male') subset(d, sex=='male' & x2>0.2) subset(d, x1>1, select=-x1) subset(d, select=c(x1,sex)) subset(d, x2<0.3, select=c(x2:sex)) subset(d, x2<0.3, -(x3:sex)) attach(subset(d, sex=='male' & x3==11, x1:x3)) search() find(x1) x1 rm(x1) find(x1) x1 detach(2) d d$z <- c(1,3,2,7) d dat <- data.frame(a=(1:3)/7, y=c('a','b1','b2'), z=1:3) dat dat2 <- upData(dat, w=A+z, rename=c(a='A'), drop='z', labels=c(A='Label for A', y='Test Label'), levels=list(y=list(a='a', b=c('b1','b2')))) table(dat2$y) table(dat$y) table(dat$y, dat2$y) dat2 describe(dat2) ?getHdata x <- runif(10) x x <- sort(x) x set.seed(1) x <- runif(5) x i <- order(x) x[i] y <- 1:8 sex <- c(rep('male',4),rep('female',4)) treat <- rep(c('A','B'),4) sex treat tapply(y, sex, mean) tapply(y, treat, mean, na.rm=T) tapply(y, list(sex,treat), mean) by(y, list(sex=sex), FUN=describe) by(y, llist(sex), FUN=describe, descript='y') page(by.data.frame) tapply(y, sex, describe) abbreviate(c('Frank','Josh','Henry')) set.seed(1) x <- runif(1000) k <- cut2(x, g=4) table(k) expand.grid(age=30:34, sex=c('female','male')) x <- c(1,2,NA,4) impute(x) impute(x, median) impute(x, mean) impute(x, 2.1) impute(x, 'random') y <- impute(x) is.imputed(y) attributes(y) describe(y) x <- 1:100 smean.cl.normal(x) smean.cl.boot(x) sample(x[1:5], 5, replace=T) ?bpower bpower.sim(.1, .15, n1=100, n2=100) bsamsize(.1, .15, power=.9) set.seed(1) y <- rnorm(100) x <- runif(100) trt <- factor(sample(c('a','b'),100,T)) state <- factor(sample(c('AL','AK','AZ'),100,T)) table(state) s <- spearman2(y ~ trt + state + x) s plot(s) ?binconf # Begin Day 2 set.seed(113) h <- runif(20) median(h) B <- 500 meds <- single(B) meds for(i in 1:B) meds[i] <- median(sample(h, 20, replace=T)) hist(meds, nclass=10) var(meds) quantile(meds, c(.05,.95)) b <- bootstrap(h, median, B=B) b limits.emp(b) summary(pbc) summary(h) summary data.restore('/tmp/dumpdata.sdd') ls() hospital <- cleanup.import(hospital) page(describe(hospital),multi=T) options('editor') options('pager') options( editor='c:/Program Files/NoteTab Light/NoteTab.exe', pager='c:/Program Files/NoteTab Light/NoteTab.exe') page(ls, multi=T) page(cut2, multi=T) attach(hospital) sex temp s <- summarize(temp, llist(sex,ageg=cut2(age,g=2)), mean, na.rm=T) s traceback() mApply <- mApply # local copy of mApply fix(mApply) # edit local copy of mApply # Fix bug: Edit line # if( !length(dn[[length(dn)]])) { # ^ # after if( add length(dn) && page(describe(hospital),multi=T) x <- 1:10 y <- runif(10) id <- letters[1:10] plot(x,y) identify(x, y, id) z <- locator(1) z text(z, 'HELLO') getHdata(pbc) datadensity(pbc) search() detach(2) attach(pbc) names(pbc) rug(bili) plot(age, bili) rug(bili, side=4) plot(age, bili) scat1d(age) scat1d(bili, side=4) hist(bili) histSpike(bili) histSpike(rnorm(100000)) plot(age, bili) histSpike(bili, side=4, add=T) histogram(~bili) plot(density(bili), type='l') scat1d(bili) densityplot(~bili) hist.data.frame(pbc) attach(pbc) ecdf(bili, q=(1:3)/4, datadensity='density') ecdf(bili, group=drug, q=.5, col=1:3) bwplot(drug~bili) bwplot(drug ~ bili, panel=panel.bpplot) ?panel.bpplot set.seed(13) x <- rnorm(1000) g <- sample(1:6, 1000, replace=T) x[g==1][1:20] <- rnorm(20)+3 # contaminate 20 x's for group 1 # default trellis box plot if(.R.) library(lattice) bwplot(g ~ x) # box-percentile plot with data density (rug plot) bwplot(g ~ x, panel=panel.bpplot, probs=seq(.01,.49,by=.01), datadensity=T) # add ,scat1d.opts=list(tfrac=1) to make all tick marks the same size # when a group has > 125 observations # small dot for means, show only .05,.125,.25,.375,.625,.75,.875,.95 quantiles bwplot(g ~ x, panel=panel.bpplot, cex=.3) # suppress means and reference lines for lower and upper quartiles bwplot(g ~ x, panel=panel.bpplot, probs=c(.025,.1,.25), means=F, qref=F) # continuous plot up until quartiles ("Tootsie Roll plot") bwplot(g ~ x, panel=panel.bpplot, probs=seq(.01,.25,by=.01)) # start at quartiles then make it continuous ("coffin plot") bwplot(g ~ x, panel=panel.bpplot, probs=seq(.25,.49,by=.01)) # same as previous but add a spike to give 0.95 interval bwplot(g ~ x, panel=panel.bpplot, probs=c(.025,seq(.25,.49,by=.01))) # decile plot with reference lines at outer quintiles and median bwplot(g ~ x, panel=panel.bpplot, probs=c(.1,.2,.3,.4), qref=c(.5,.2,.8)) # default plot with tick marks showing all observations outside the outer # box (.05 and .95 quantiles), with very small ticks bwplot(g ~ x, panel=panel.bpplot, nout=.05, scat1d.opts=list(frac=.01)) # show 5 smallest and 5 largest observations bwplot(g ~ x, panel=panel.bpplot, nout=5) # Use a scat1d option (preserve=T) to ensure that the right peak extends # to the same position as the extreme scat1d bwplot(~x , panel=panel.bpplot, probs=seq(.00,.5,by=.001), datadensity=T, scat1d.opt=list(preserve=T)) # Draw a prototype showing how to interpret the plots dev.off() bpplt() # make a local copy of bwplot that always uses panel.bpplot (S-Plus only) # bwplot$panel <- panel.bpplot # bwplot(g ~ x, nout=.05) dev.off() plot(age, bili, ylim=c(0,15), ylab='Bilirubin', main='Hot Stuff') xyplot(bili ~ age) title('First\nSecond') par(mfrow=c(2,2)) hist(bili); ecdf(bili) plot(age, bili) par(mfrow=c(1,1)) show.pch() show.col() plot(1:2, lty=3, type='l') search() detach(2) # Assignment 6 Prob. 2 attach(subset(pbc, drug!='not randomized')) search() histogram(~bili | drug) p1 <- histogram(~bili | drug) p2 <- histogram(~albumin | drug) print(p1, split=c(1,1,1,2), more=T) print(p2, split=c(1,2,1,2)) p1 <- densityplot(~bili | drug) p2 <- densityplot(~albumin | drug) p3 <- histogram(~albumin) dev.off() print(p1, split=c(1,1,1,3), more=T) print(p2, split=c(1,2,1,3), more=T) print(p3, position=c(0,.66,.5,1)) # Doesn't run - S+ error: ecdf(data.frame(bili,albumin, protime), group=drug) bwplot(drug ~ bili) # Day 3 ?symbols select <- c("Atlanta", "Atlantic City", "Bismarck", "Boise", "Dallas", "Denver", "Lincoln", "Los Angeles", "Miami", "Milwaukee", "New York", "Seattle") # Modifications from help file city.x <- city.x city.y <- city.y names(city.x) <- city.name names(city.y) <- city.name city.name <- city.name names(city.name) <- city.name pop <- c(426, 60, 28, 34, 904, 494, 129, 2967, 347, 741, 7072, 557) usa() # plot map of US symbols(city.x[select], city.y[select], circles = sqrt(pop), add = T) text(city.x[select], city.y[select], city.name[select], cex = .6) murder <- state.x77[,"Murder"] west <- state.center$x < -95 therm <- cbind(.4, 1, murder[west]/max(murder[west])) usa(xlim = c(94, 135), ylim = c(25, 52)) symbols(state.center$x[west], state.center$y[west], thermometers = therm, inches = .5, add = T) title(main = "Murder Rates in the Western U.S.") usa() x <- 1:10 y <- runif(10) z <- runif(10) symbols(x, y, circles=z, inches=.2) x <- matrix(rnorm(200*5),ncol=5) pairs(x) splom(~x) ?splom brush(state.x91, hist=T) ?brush x <- runif(1000) y <- rnorm(1000) plot(x,y) histSpike(x, add=T, frac=.1) mtext('Count',side=4, adj=0) x <- 1:10 y <- x^2 ys <- seq(0,100,10) xyplot(sqrt(y) ~ x, type='l', ylab='y', ylim=sqrt(c(0,100)), scales=list(y=list(at=sqrt(ys), labels=format(ys)))) show.settings() ?trellis.par.get trellis.par.get('plot.line') set.seed(111) dfr <- expand.grid(month=1:12, year=c(1997,1998), reps=1:100) attach(dfr) y <- abs(month-6.5)+ 2*runif(length(month))+year-1997 length(month) xYplot(y ~ month, groups=year) s <- summarize(y, llist(month,year), mean, na.rm=T) s xYplot(y ~ month, groups=year, type='b', data=s) s <- summarize(y > 6, llist(month,year), mean, stat.name='ygt6', na.rm=T) s xYplot(ygt6 ~ month | factor(year), type='b', data=s) search() detach(2) # Assignment 7 Prob. 1 getHdata(diabetes) attach(diabetes) s <- summarize(glyhb, llist(frame,gender), median, na.rm=T) Dotplot(frame ~ glyhb | gender, data=s, cex=1.5) s <- summarize(glyhb, llist(frame,Age=cut2(age,g=4)), median, na.rm=T) Dotplot(frame ~ glyhb | Age, data=s, cex=1.5, as.table=T) datadensity(diabetes) # Problem 3 symbols(age, weight, circles=chol, inches=.15) s <- summarize(glyhb, llist(frame,Age=cut2(age,g=4),gender), median, na.rm=T) s Dotplot(frame ~ glyhb | Age, data=s, groups=gender, cex=1.5) # encountered bug due to groups= # Problem 5 search() detach(2) getHdata(olympics.1996) attach(olympics.1996) contents(olympics.1996) plot(log(population), medals) plot(log(population), medals/population) plsmo(log(population), medals/population, datadensity=T, add=T) text(log(population),medals/population, country) length(y) detach(2) attach(dfr) s <- summarize(y, llist(month,year), smedian.hilow, conf.int=.5) xYplot(Cbind(y,Lower,Upper) ~ month, groups=year, data=s, keys='lines', method='alt', type='b') s <- summarize(y, llist(month,year), smean.cl.boot) s xYplot(Cbind(y,Lower,Upper) ~ month | factor(year), type='l', data=s) # xYplot(y ~..., nx=F, method=smean.cl.boot) xYplot(Cbind(y,Lower,Upper) ~ month | factor(year), method='filled bands', type='l', data=s) s <- summarize(y, llist(month,year), quantile, probs=c(.5,.1,.25,.75,.9), type='matrix') xYplot(Cbind(y) ~ month | factor(year), data=s, type='l', method='bands', lwd.bands=c(1,3,3,1), lwd=5) set.seed(1) age <- rnorm(1000, 30, 10) sbp <- 0.3*(age-30) + rnorm(1000, 120, 15) xYplot(sbp ~ age, method='quantile', xlim=c(5,60), ylim=c(100,140), nx=80) xYplot(sbp ~ age, method=smean.cl.normal, xlim=c(5,60), ylim=c(100,140), nx=60) d <- expand.grid(continent=c('USA','Europe'), year=1999:2001) d d$proportion <- c(.2,.18,.19,.22,.23,.20) d$SE <- c(.02,.01,.02,.015,.021,.025) d Dotplot(year ~ Cbind(proportion, proportion-1.96*SE, proportion+1.96*SE) | continent, data=d, ylab='Year') yr <- factor(d$year) yr <- reorder.factor(yr, d$proportion, mean) levels(yr) search() detach(2) getHdata(titanic3) page(describe(titanic3),multi=T) datadensity(titanic3) attach(titanic3) rm(age) plsmo(age, survived, datadensity=T) bwplot(pclass ~ age | sex) s <- summary(survived ~ age + sex + pclass) plot(s,cex=1.5) plsmo(age, survived, group=interaction(pclass,sex), col=1:6, datadensity=T) # Assignment 8 search() detach(2) # Problem 1 datadensity(diabetes) # plot(diabetes) hist.data.frame(diabetes) ecdf(diabetes, group=diabetes$gender, label.curve=F) # Problem 2 nac <- naclus(diabetes) plot(nac) naplot(nac) # Problem 3 plot(varclus(~., data=diabetes)) # Problem 4 bwplot(cut2(age,g=5) ~ glyhb | gender*frame, panel=panel.bpplot, data=diabetes) # Problem 5 attach(diabetes) s <- summary(glyhb ~ age + gender + height + weight + chol, fun=quantile) plot(s, which=2:4, pch=c(91,16,93), xlab=label(glyhb)) # Shows error bars using # plot.summary.formula.response # Instead do cross-classified analysis s <- summarize(glyhb, llist(age=cut2(age,g=3),gender, weight=cut2(weight,g=2)), smedian.hilow, conf.int=.5) Dotplot(age~Cbind(glyhb,Lower,Upper)| gender*weight, data=s) xYplot(glyhb ~ age | gender*cut2(weight,g=2), method='quantile', data=diabetes)