--- output: pdf_document --- ## Manipulating Vectors Modify the following character vector to keep only street names, then sort and remove duplicates. ```{r} x <- c("120 Main St", "231 Walnut Grove", "374 Central Pk", "402 Providence Ln", "555 Central Pk") unique(sort(sub(" ", "", gsub("[0-9]", "", x)))) ``` How could you sum all of the numbers between 1 and 1,000 that are evenly divisible by 3 or 5? What about numbers between 1 and 100,000 divisible by 4, 7, or 13? ```{r} # sum [1,10] divisible by 3 or 5 3 + 5 + 6 + 9 + 10 sum(unique(c(seq(3,1000,by=3), seq(5,1000,by=5)))) sum(unique(c(seq(4,100000,by=4), seq(7,100000,by=7), seq(13,100000,by=13)))) ``` ## Manipulating Data Frames ```{r} size <- 1000 set.seed(475) x <- data.frame(id=sample(100, size, replace=TRUE), visitdate=sample(365*2, size, replace=TRUE) ) male <- sample(c(0,1,NA), 100, replace=TRUE, prob=c(45, 45, 10)) age <- round(runif(100, 40, 80)) # create male and age columns inside data.frame # for example - use male[1] where id==1 # convert visitdate to Date field - assume day 0 is 2010-01-01 # order data.frame by id and visitdate # show females age 75 or older # display mean age by gender x <- merge(x, cbind(male, id=seq(100))) x <- merge(x, cbind(age, id=seq(100))) x[,'visitdate'] <- as.Date(x[,'visitdate'], origin='2010-01-01') x <- x[with(x, order(id, visitdate)),] row.names(x) <- NULL head(x) ``` ## Writing Functions Celsius to Fahrenheit: $f(x) = (x*9/5) + 32$ Celsius to Kelvin: $f(x) = x + 273.15$ Write a temperature conversion function. It should take a vector of temperatures, the `from` type, and the `to` type. ```{r} # test temp function with this data set.seed(20) x <- round(rnorm(30, 10, 10)) temp <- function(x, from='C', to='F') { if(from == 'F') { x <- (x - 32)*5/9 } else if(from == 'K') { x <- x - 273.15 } if(to == 'F') { x <- x*9/5 + 32 } else if(to == 'K') { x <- x + 273.15 } x } xf <- temp(x, from='C', to='F') xk <- temp(x, from='C', to='K') all.equal(temp(xf, from='F', 'K'), xk) ``` ## Generating Plots Given the following data set, use `ggplot2` and `qplot` and create several plots. ```{r} library(ggplot2) library(Hmisc) getHdata(vlbw) # scatterplot gest VS bwt qplot(gest, bwt, data=vlbw) # scatterplot gest VS bwt, add color and shape using variable sex qplot(gest, bwt, data=vlbw, shape=sex, col=sex) # boxplot of btw by sex qplot(sex, bwt, data=vlbw, geom='boxplot') # scatterplot of gest VS bwt, facet by race qplot(gest, bwt, data=vlbw, facets=race ~ .) # scatterplot of gest VS bwt, add regression line qplot(gest, bwt, data=vlbw, geom=c('point','smooth'), method='lm', formula=y~x) ```