# Begin Day 1

3+3
12+5
if(x==3) {
	y <- 2
	z <- 3
}

x <- cut()

getHdata(pbc)
options(width=120)
contents(pbc)
con <- contents(pbc)
con
print(con, sort='names')
?contents
print(con, sort='NAs')
find(getHdata)
describe(pbc)
page(describe(pbc),multi=T)
datadensity(pbc)

nac <- naclus(pbc)
plot(nac)
naplot(nac)

d <- data.frame(x=1:10,y=11:20)
d
find(d)
find(x)
find(y)
rm(x,y)
attach(d)
find(x)
find(y)
search()
x
y
d$y
detach('d') # or detach(2)
search()

x1 <- 1:4
sex <- rep(c('male','female'),2)
x1
sex
subset(x1, sex=='male')
d <- data.frame(x1=x1,
 x2=(1:4)/10, x3=11:14,
 sex=sex)
d
subset(d, sex=='male')
subset(d, sex=='male' & x2>0.2)
subset(d, x1>1, select=-x1)
subset(d, select=c(x1,sex))
subset(d, x2<0.3, select=c(x2:sex))
subset(d, x2<0.3, -(x3:sex))
attach(subset(d, sex=='male' &
 x3==11, x1:x3))
search()
find(x1)
x1
rm(x1)
find(x1)
x1
detach(2)
d
d$z <- c(1,3,2,7)
d
dat <- data.frame(a=(1:3)/7,
 y=c('a','b1','b2'),
 z=1:3)
dat
dat2 <- upData(dat,
 w=A+z, rename=c(a='A'),
 drop='z',
 labels=c(A='Label for A',
          y='Test Label'),
 levels=list(y=list(a='a',
           b=c('b1','b2'))))
table(dat2$y)
table(dat$y)
table(dat$y, dat2$y)
dat2
describe(dat2)
?getHdata

x <- runif(10)
x
x <- sort(x)
x
set.seed(1)
x <- runif(5)
x
i <- order(x)
x[i]

y <- 1:8
sex <- c(rep('male',4),rep('female',4))
treat <- rep(c('A','B'),4)
sex
treat
tapply(y, sex, mean)
tapply(y, treat, mean, na.rm=T)
tapply(y, list(sex,treat), mean)

by(y, list(sex=sex), FUN=describe)
by(y, llist(sex), FUN=describe, descript='y')
page(by.data.frame)
tapply(y, sex, describe)

abbreviate(c('Frank','Josh','Henry'))
set.seed(1)
x <- runif(1000)
k <- cut2(x, g=4)
table(k)

expand.grid(age=30:34, sex=c('female','male'))

x <- c(1,2,NA,4)
impute(x)
impute(x, median)
impute(x, mean)
impute(x, 2.1)
impute(x, 'random')
y <- impute(x)
is.imputed(y)
attributes(y)
describe(y)
x <- 1:100
smean.cl.normal(x)
smean.cl.boot(x)

sample(x[1:5], 5, replace=T)

?bpower

bpower.sim(.1, .15, n1=100, n2=100)
bsamsize(.1, .15, power=.9)

set.seed(1)
y <- rnorm(100)
x <- runif(100)
trt <- factor(sample(c('a','b'),100,T))
state <- factor(sample(c('AL','AK','AZ'),100,T))
table(state)
s <- spearman2(y ~ trt + state + x)
s
plot(s)
?binconf

# Begin Day 2

set.seed(113)
h <- runif(20)
median(h)
B <- 500
meds <- single(B)
meds
for(i in 1:B)
	meds[i] <- median(sample(h, 20, replace=T))
hist(meds, nclass=10)
var(meds)
quantile(meds, c(.05,.95))

b <- bootstrap(h, median, B=B)
b
limits.emp(b)

summary(pbc)
summary(h)
summary

data.restore('/tmp/dumpdata.sdd')
ls()
hospital <- cleanup.import(hospital)
page(describe(hospital),multi=T)
options('editor')
options('pager')
options(
editor='c:/Program Files/NoteTab Light/NoteTab.exe',
pager='c:/Program Files/NoteTab Light/NoteTab.exe')
page(ls, multi=T)
page(cut2, multi=T)

attach(hospital)
sex
temp
s <- summarize(temp, 
               llist(sex,ageg=cut2(age,g=2)), 
               mean, na.rm=T)
s
traceback()
mApply <- mApply # local copy of mApply
fix(mApply)   # edit local copy of mApply
# Fix bug: Edit line 	
# if( !length(dn[[length(dn)]])) {
#    ^
# after      if(      add  length(dn) &&

page(describe(hospital),multi=T)

x <- 1:10
y <- runif(10)
id <- letters[1:10]
plot(x,y)
identify(x, y, id)
z <- locator(1)
z
text(z, 'HELLO')

getHdata(pbc)
datadensity(pbc)
search()
detach(2)
attach(pbc)
names(pbc)
rug(bili)
plot(age, bili)
rug(bili, side=4)
plot(age, bili)
scat1d(age)
scat1d(bili, side=4)
hist(bili)
histSpike(bili)
histSpike(rnorm(100000))
plot(age, bili)
histSpike(bili, side=4, add=T)
histogram(~bili)

plot(density(bili), type='l')
scat1d(bili)
densityplot(~bili)
hist.data.frame(pbc)

attach(pbc)
ecdf(bili, q=(1:3)/4, 
 datadensity='density')
ecdf(bili, group=drug, q=.5,
     col=1:3)

bwplot(drug~bili)
bwplot(drug ~ bili, panel=panel.bpplot)
?panel.bpplot
set.seed(13) 

x <- rnorm(1000) 

g <- sample(1:6, 1000, replace=T) 

x[g==1][1:20] <- rnorm(20)+3   # contaminate 20 x's for group 1 

 

# default trellis box plot 

if(.R.) library(lattice) 

bwplot(g ~ x) 

 

# box-percentile plot with data density (rug plot) 

bwplot(g ~ x, panel=panel.bpplot, probs=seq(.01,.49,by=.01), datadensity=T) 

# add ,scat1d.opts=list(tfrac=1) to make all tick marks the same size 

# when a group has > 125 observations 

 

# small dot for means, show only .05,.125,.25,.375,.625,.75,.875,.95 quantiles 

bwplot(g ~ x, panel=panel.bpplot, cex=.3) 

 

# suppress means and reference lines for lower and upper quartiles 

bwplot(g ~ x, panel=panel.bpplot, probs=c(.025,.1,.25), means=F, qref=F) 

 

# continuous plot up until quartiles ("Tootsie Roll plot") 

bwplot(g ~ x, panel=panel.bpplot, probs=seq(.01,.25,by=.01)) 

 

# start at quartiles then make it continuous ("coffin plot") 

bwplot(g ~ x, panel=panel.bpplot, probs=seq(.25,.49,by=.01)) 

 

# same as previous but add a spike to give 0.95 interval 

bwplot(g ~ x, panel=panel.bpplot, probs=c(.025,seq(.25,.49,by=.01))) 

 

# decile plot with reference lines at outer quintiles and median 

bwplot(g ~ x, panel=panel.bpplot, probs=c(.1,.2,.3,.4), qref=c(.5,.2,.8)) 

 

# default plot with tick marks showing all observations outside the outer 

# box (.05 and .95 quantiles), with very small ticks 

bwplot(g ~ x, panel=panel.bpplot, nout=.05, scat1d.opts=list(frac=.01)) 

 

# show 5 smallest and 5 largest observations 

bwplot(g ~ x, panel=panel.bpplot, nout=5) 

 

# Use a scat1d option (preserve=T) to ensure that the right peak extends  

# to the same position as the extreme scat1d 

bwplot(~x , panel=panel.bpplot, probs=seq(.00,.5,by=.001),  

       datadensity=T, scat1d.opt=list(preserve=T)) 

 

# Draw a prototype showing how to interpret the plots 
dev.off()
bpplt() 

 

# make a local copy of bwplot that always uses panel.bpplot (S-Plus only) 

# bwplot$panel <- panel.bpplot 

# bwplot(g ~ x, nout=.05) 
dev.off()
plot(age, bili, ylim=c(0,15),
   ylab='Bilirubin',
   main='Hot Stuff')
xyplot(bili ~ age)
title('First\nSecond')
par(mfrow=c(2,2))
hist(bili); ecdf(bili)
plot(age, bili)

par(mfrow=c(1,1))
show.pch()
show.col()
plot(1:2, lty=3, type='l')

search()
detach(2)

# Assignment 6 Prob. 2
attach(subset(pbc,
 drug!='not randomized'))
search()
histogram(~bili | drug)

p1 <- histogram(~bili | drug)
p2 <- histogram(~albumin | drug)
print(p1, split=c(1,1,1,2),
      more=T)
print(p2, split=c(1,2,1,2))

p1 <- densityplot(~bili | drug)
p2 <- densityplot(~albumin | drug)
p3 <- histogram(~albumin)
dev.off()
print(p1, split=c(1,1,1,3),
      more=T)
print(p2, split=c(1,2,1,3), more=T)
print(p3, position=c(0,.66,.5,1))

# Doesn't run - S+ error:
ecdf(data.frame(bili,albumin,
   protime), group=drug)
bwplot(drug ~ bili)

# Day 3

?symbols
select <- c("Atlanta", "Atlantic City", "Bismarck", "Boise", "Dallas", 
            "Denver", "Lincoln", "Los Angeles", "Miami", "Milwaukee", 
            "New York", "Seattle")
# Modifications from help file
city.x <- city.x
city.y <- city.y 
names(city.x) <- city.name
names(city.y) <- city.name
city.name <- city.name 
names(city.name) <- city.name 
pop <- c(426, 60, 28, 34, 904, 494, 129, 2967, 347, 741, 7072, 557) 
usa() # plot map of US 
symbols(city.x[select], city.y[select], circles = sqrt(pop), add = T) 
text(city.x[select], city.y[select], city.name[select], cex = .6) 
murder <- state.x77[,"Murder"] 
west <- state.center$x < -95 
therm <- cbind(.4, 1, murder[west]/max(murder[west])) 
usa(xlim = c(94, 135), ylim = c(25, 52)) 
symbols(state.center$x[west], state.center$y[west],  
        thermometers = therm, inches = .5, add = T) 
title(main = "Murder Rates in the Western U.S.") 
usa()
x <- 1:10
y <- runif(10)
z <- runif(10)
symbols(x, y, circles=z, inches=.2)
x <- matrix(rnorm(200*5),ncol=5)
pairs(x)
splom(~x)
?splom
brush(state.x91, hist=T)
?brush

x <- runif(1000)
y <- rnorm(1000)
plot(x,y)
histSpike(x, add=T, frac=.1)
mtext('Count',side=4, adj=0)

x <- 1:10
y <- x^2
ys <- seq(0,100,10)
xyplot(sqrt(y) ~ x, type='l',
 ylab='y', ylim=sqrt(c(0,100)),
 scales=list(y=list(at=sqrt(ys),
  labels=format(ys))))

show.settings()
?trellis.par.get
trellis.par.get('plot.line')

set.seed(111)
dfr <- expand.grid(month=1:12,
 year=c(1997,1998),
 reps=1:100)
attach(dfr)
y <- abs(month-6.5)+
 2*runif(length(month))+year-1997
length(month)
xYplot(y ~ month, groups=year)
s <- summarize(y, llist(month,year),
      mean, na.rm=T)
s
xYplot(y ~ month, groups=year, 
 type='b', data=s)

s <- summarize(y > 6, llist(month,year),
 mean, stat.name='ygt6', na.rm=T)
s
xYplot(ygt6 ~ month | factor(year), 
  type='b', data=s)
search()
detach(2)

# Assignment 7 Prob. 1
getHdata(diabetes)
attach(diabetes)
s <- summarize(glyhb,
 llist(frame,gender), median,
 na.rm=T)
Dotplot(frame ~ glyhb | gender,
 data=s, cex=1.5)
s <- summarize(glyhb,
 llist(frame,Age=cut2(age,g=4)),
 median, na.rm=T)
Dotplot(frame ~ glyhb | Age,
 data=s, cex=1.5, as.table=T)
datadensity(diabetes)

# Problem 3
symbols(age, weight, circles=chol,
 inches=.15)

s <- summarize(glyhb, 
 llist(frame,Age=cut2(age,g=4),gender),
 median, na.rm=T)
s
Dotplot(frame ~ glyhb | Age,
 data=s, groups=gender, cex=1.5)
# encountered bug due to groups=

# Problem 5
search()
detach(2)
getHdata(olympics.1996)
attach(olympics.1996)
contents(olympics.1996)
plot(log(population), medals)
plot(log(population), medals/population)
plsmo(log(population),
 medals/population, datadensity=T,
 add=T)
text(log(population),medals/population,
  country)

length(y)
detach(2)
attach(dfr)
s <- summarize(y, llist(month,year),
 smedian.hilow, conf.int=.5)
xYplot(Cbind(y,Lower,Upper) ~
 month, groups=year, data=s,
 keys='lines', method='alt', type='b')

s <- summarize(y, llist(month,year),
 smean.cl.boot)
s
xYplot(Cbind(y,Lower,Upper) ~
 month | factor(year), type='l',
 data=s)
# xYplot(y ~..., nx=F, 
 method=smean.cl.boot)
xYplot(Cbind(y,Lower,Upper) ~
 month | factor(year),
 method='filled bands', type='l',
 data=s)

s <- summarize(y, llist(month,year),
 quantile, probs=c(.5,.1,.25,.75,.9),
 type='matrix')
xYplot(Cbind(y) ~ month | factor(year),
 data=s, type='l', method='bands',
 lwd.bands=c(1,3,3,1), lwd=5)

set.seed(1)
age <- rnorm(1000, 30, 10)
sbp <- 0.3*(age-30) + rnorm(1000,
 120, 15)
xYplot(sbp ~ age, method='quantile',
 xlim=c(5,60), ylim=c(100,140), nx=80)

xYplot(sbp ~ age, method=smean.cl.normal,
 xlim=c(5,60), ylim=c(100,140),
 nx=60)

d <- expand.grid(continent=c('USA','Europe'),
 year=1999:2001)
d
d$proportion <- c(.2,.18,.19,.22,.23,.20)
d$SE <- c(.02,.01,.02,.015,.021,.025)
d
Dotplot(year ~ Cbind(proportion,
  proportion-1.96*SE,
  proportion+1.96*SE) | continent, data=d,
 ylab='Year')
yr <- factor(d$year)
yr <- reorder.factor(yr, d$proportion,
 mean)
levels(yr)

search()
detach(2)
getHdata(titanic3)
page(describe(titanic3),multi=T)
datadensity(titanic3)
attach(titanic3)
rm(age)
plsmo(age, survived, datadensity=T)
bwplot(pclass ~ age | sex)
s <- summary(survived ~
 age + sex + pclass)
plot(s,cex=1.5)
plsmo(age, survived,
 group=interaction(pclass,sex), 
 col=1:6, datadensity=T)

# Assignment 8
search()
detach(2)
# Problem 1
datadensity(diabetes)
# plot(diabetes)
hist.data.frame(diabetes)
ecdf(diabetes, group=diabetes$gender,
  label.curve=F)


# Problem 2
nac <- naclus(diabetes)
plot(nac)
naplot(nac)

# Problem 3
plot(varclus(~., data=diabetes))

# Problem 4
bwplot(cut2(age,g=5) ~ glyhb |
  gender*frame, panel=panel.bpplot,
  data=diabetes)

# Problem 5
attach(diabetes)
s <- summary(glyhb ~ age + gender +
 height + weight + chol, fun=quantile)
plot(s, which=2:4, pch=c(91,16,93),
 xlab=label(glyhb))
# Shows error bars using 
# plot.summary.formula.response
# Instead do cross-classified analysis
s <- summarize(glyhb,
  llist(age=cut2(age,g=3),gender,
        weight=cut2(weight,g=2)),
  smedian.hilow, conf.int=.5)
Dotplot(age~Cbind(glyhb,Lower,Upper)|
  gender*weight, data=s)
xYplot(glyhb ~ age | gender*cut2(weight,g=2),
 method='quantile', data=diabetes)