This dataset is courtesy of Patrick Royston and Willi Saurbrei. It is the official version of a dataset from the website for their book Royston P, Sauerbrei W,
, Wiley, Chichester, 2008. Details of the dataset are on pp. 262-263 of their book. Some redundant variables from the tab-delimited ASCII version of the dataset have been deleted. The R file was created with the code below.
library(Hmisc)
gbsg <- csv.get('gbsg_ba_ca.dat', sep='\t')
for(i in 1:length(gbsg)) label(gbsg[[i]]) <- ''
with(gbsg, table(X.d,censrec))
with(gbsg, table(round(rectime/X.t,2)))
with(gbsg, table(grade, paste(gradd1,gradd2)))
redun(~., data=gbsg)
gbsg <- upData(gbsg,
rename=c(X.st='st', X.d='d', X.t='t', X.t0='t0'),
drop=c('rectime','censrec','gradd1','gradd2','st','t0'),
labels=c(d='censrec', t='rectime/365.25',
grade='1:gradd1=0 gradd2=0,2:1 0,3:1,1'),
levels=list(meno=c('postmenopausal','premenopausal')))
# Note: t0 was constant at 0, st at 1
redun(~., data=gbsg)
Save(gbsg)
html(contents(gbsg), file='Cgbsg.html')