########################## ## CQS Summer Institute ## ## Biostatistics 1 ## ## Tatsuki Koyama ## ########################## ## MouseWeight.xls has the data from an experiment. ## Mice 11~20 are in 'Uninfected' group, and mice 21~30 are in 'Infected' ## group. ## 1. Data cleaning ## ## This data sheet requires some cleaning. ## First, we need this file as a .csv (text) file. Save it as ## 'MouseWeight.csv'. (Some of the information gets lost.) ## Second, Mean and SD are not a part of the data. They do not belong here with ## the data. Delete them. ## Third, each mouse needs 'Group' variable. Fill in the column. (There is an ## easy way to do this in R, but copy-pasting is ok for now.) ## Read in the data. mouseWeight <- read.csv('MouseWeight.csv', sep=',') ## Data summary. ## Let's get some summaries of the data. ## groupSum() function is useful. source('http://biostat.mc.vanderbilt.edu/wiki/pub/Main/TatsukiRcode/RFunctions071817.R') groupSum(mouseWeight$Baseline, mouseWeight$Group) ## Let's look at the data. source('http://biostat.mc.vanderbilt.edu/wiki/pub/Main/TatsukiRcode/RFunctions0.R') ## Some documentation on http://biostat.mc.vanderbilt.edu/wiki/Main/TatsukiRcode tplot(Baseline ~ Group, data=mouseWeight) ## Try different options... tplot(Baseline ~ Group, data=mouseWeight, type='db', show.n=TRUE, ylim=c(10,18)) ## What do you notice? ## Q1 Test whether the baseline weight is different (on average) between the groups. ## Write the hypotheses. t.test(mouseWeight$Baseline ~ mouseWeight$Group) t.test(Baseline ~ Group, data=mouseWeight) wilcox.test(mouseWeight$Baseline ~ mouseWeight$Group) ## What can you say? ## Q1 Test whether 'infected' mice, on average, lose more weight by Day 1. ## Q1a. Write the hypotheses. ## ## Q1b. Create a column 'change1', which is weight change from Baseline to Day 1. mouseWeight$change1 <- mouseWeight$Day1 - mouseWeight$Baseline groupSum(mouseWeight$change1, mouseWeight$Group, Combined=TRUE, Test=TRUE) tplot(change1 ~ Group, data=mouseWeight, show.n=TRUE, median.line=TRUE) title('Weight change', adj=0) t.test(change1 ~ Group, data=mouseWeight, alt='less') ## to get 90% confidence interval... t.test(change1 ~ Group, data=mouseWeight, alt='two', conf=0.90) ## Q1c. How about using the fold-change? mouseWeight$foldChange1 <- mouseWeight$Day1 / mouseWeight$Baseline groupSum(mouseWeight$foldChange1, mouseWeight$Group, Combined=TRUE, Test=TRUE) tplot(foldChange1 ~ Group, data=mouseWeight, show.n=TRUE, median.line=TRUE) title('Weight fold-change', adj=0) t.test(foldChange1 ~ Group, data=mouseWeight, alt='less') ## Q2 ## The following data were reported in ## Preliminary Report: Findings from teh Aspirin Component of the Ongoing ## Physicians' Health Study. N. Engl. J. Med. 318:262-264, 1988. tab <- data.frame(rbind( c(18+171, 10845), c(5+99, 10933) )) row.names(tab) <- c('Placebo','Aspirin') names(tab) <- c('Fatal and Non-fatal Attack', 'No Attack') ## Q2a Find the proportions of heart attacks for each treatment group. ## tab[,1] / (tab[,1] + tab[,2]) ## ## 189 / 11037 and 104 / 11034 ## Q2b Find the odds of heart attacks for each treatment group. ## tab[,1] / tab[,2] ## Q2c Find the odds ratio and interpret the number. ## odds <- tab[,1] / tab[,2] ## odds.ratio <- odds[1] / odds[2] ## ## The odds of heart attack is 1.83 times as high for those taking ## placebo than those taking aspirin. ## Conduct a Fisher's exact test to test the odds are the same. ## fisher.test(tab) ## Conduct a chi-sq test to test P[Heart Attack] are the same. ## chisq.test(tab, correc=FALSE)