# Sys.setlocale("LC_TIME", "us") ## require 'TextBox.R' and 'beatlesongs2012.csv' in the same holder. source('TextBox.R') ############### ## 2012/3/2 ## ############### ## An old, old version of R might give an error. ########################## ## Color and Font Sizes ## ########################## ## For poster (width = 28 inches / height = 21 inches) LetterSize <- FALSE paper.width <- 28 paper.height <- 21 size1 <- 1.7 size2 <- 1.0 margins <- c(7.5,5,7.5,5)+.1 char.expansion <- 1.0 ## For Letter size paper (width = 11 inches / height = 8.5 inches) ## - Too small to read - if(LetterSize){ paper.width <- 11 paper.height <- 8.5 size1 <- 1.2 size2 <- 0.7 margins <- c(5,3,5,3)+.1 char.expansion <- 0.62 } # Background colors bkgrc1 <- '#fcfbfd' bkgrc2 <- grey(.95) # Line and text colors linec <- grey(.90) textc1 <- grey(.20) textc2 <- grey(.25) grey1 <- grey(.75) pointc <- c('black','red','blue','purple') mono.adjust <- 0.8 # mono font looks bigger compared to serif. r.version <- paste(R.version$major, R.version$minor, sep='.') ################### ## Read the data ## ################### d <- read.csv('beatlesongs2012.csv', header=TRUE, as.is=FALSE) d$Release.Date <- as.Date(as.character(d$Release.Date)) d$Song <- as.character( d$Song ) ## factor levels of Album needs to be in chronological order. d$Album <- factor(d$Album, levels=unique(d$Album) ) ## 229 entries on 211 unique entries, ## 210 songs because 'Revolution' (single) and 'Revolution 1' (album) are the same song. how.many.songs <- 210 x <- d$Release.Date ; y <- d$Length ################### ## Jitter the points if the release date and song length are identical. ################### rdl <- paste(d$Release.Date, d$Length, sep=',') rdl.dup <- rdl[duplicated(rdl)] d$pos2 <- rep(0, nrow(d)) # specify new x-position (jittered if necessary) for(i in seq(length(rdl.dup))){ pos <- which(rdl==rdl.dup[i]) if(length(pos)==2){ d$pos2[pos] <- c(-4,4) } if(length(pos)==3){ d$pos2[pos] <- c(-4,0,4) } } ################## ## Release Date ## ################## # albums album.titles <- names( which(table(d$Album) > 5) ) album.titles <- album.titles[ album.titles != 'Magical Mystery Tour' ] album.rd <- unique( d$Release.Date[ d$Album %in% album.titles ] ) ## album release date # eps (extended play) ep.titles <- c('Long Tall Sally','Magical Mystery Tour') ep.rd <- unique( d$Release.Date[d$Album %in% ep.titles] ) ########################### ## Define plotting range ## ########################### xrange <- range( d$Release.Date )+c(-60,60) ## 60-day cushion yrange <- range( d$Length )*c(0,1.05) ## Expand a little yrange[1] <- -20 ## 20-second cushion xrange[1] <- as.Date('1962-05-01') ########## ## PLOT ## ########## pdf('BeatlesongsPoster2012.pdf', width=paper.width, height=paper.height) ## plot parameters # par(family='serif', mar=margins, bg=bkgrc2, cex=char.expansion) par(family='serif', mar=margins, bg=bkgrc2, cex=char.expansion) ## Plot plot(d$Release.Date, d$Length, type='n', las=1, bty='n', xaxt='n', yaxt='n', xlab='', ylab='', xlim=xrange, ylim=yrange, xaxs='i', yaxs='i') rect(xrange[1], 0, xrange[2], yrange[2], col=bkgrc1, border=bkgrc1) axis(side=2, at=60*(0:3), label=0:3, tick=FALSE, las=1, line=-.8, cex.axis=size2) axis(side=4, at=60*(0:8), label=0:8, tick=FALSE, las=1, line=-.8, cex.axis=size2) # ard <- format(album.rd, '%Y/%m/%d') ## grid abline(h=60*(0:8), col=linec, lty=1) ylines <- as.Date(paste(1963:1971, '-1-1', sep='')) for(i in ylines){ lines(rep(i,2), c(0,600), col=linec, lty=1) } ## Album/Single Summary (release dates, minimum, maximum song length) mi <- as.vector(sapply(split(d,d$Album), function(x) min(x$Length) )) #minimum legth for album/single ma <- as.vector(sapply(split(d,d$Album), function(x) max(x$Length) )) #maximum legth for album/single hm.rd <- length(mi) ## how many unique release dates? all.rd <- d$Release.Date[ !duplicated(d$Album) ] ## Label year and arrow at the bottom. text(ylines, rep(2.0,9), substr(ylines,1,4), col=textc2, cex=size2, pos=4, offset=0.08) text(ylines+28, rep(2.5,9), expression(''%->%''), col=textc2, cex=size2, pos=4, offset=0.4) text(as.Date('1962-5-20'),2,'1962', col=textc2, cex=size2, pos=4, offset=0.08) text(as.Date('1962-5-20')+28,2.5, expression(''%->%''), col=textc2, cex=size2, pos=4, offset=0.4) ## Label album release date (also the first single) shift <- which(album.titles=='Yellow Submarine') a.rd.md <- paste( format(album.rd, '%b'), as.numeric(format(album.rd, '%d'))) text(album.rd[-shift], -4, a.rd.md[-shift], xpd=TRUE, adj=0.5, cex=size2, col=textc2) text(album.rd[ shift]+10, -4, a.rd.md[ shift], xpd=TRUE, adj=0.5, cex=size2, col=textc2) first.single.date <- d$Release.Date[d$Album=='z01'][1] fsd <- paste( format(first.single.date,'%b'), as.numeric(format(first.single.date, '%d'))) text(first.single.date, -4, fsd, xpd=TRUE, adj=0.5, cex=size2, col=textc2) ## Album name and the triangle above text(album.rd[-shift], rep(-13.0,length(album.rd)-1),album.titles[-shift], cex=size2, xpd=TRUE, srt=30, adj=1) text(album.rd[ shift], -13, album.titles[ shift], cex=size2, xpd=TRUE, srt=30, adj=1) points(album.rd, rep(-09,length(album.rd)), cex=size2, xpd=TRUE, pch=17) ## Vertical Lines connecting shortest and longest songs within album / single. for(i in 1:hm.rd){ lines(rep(all.rd[i], 2), c(mi[i], ma[i]), col=grey1, lwd=1.5, lend=0) } ## R/pdf does something funny. Right indent (pos=2) doesn't work properly when there is a letter 'Y' in the word. ## An excetra space is added at the end of the word which includes the letter 'Y'. how.many.Y <- sapply(strsplit(d$Song,''), function(x) sum(x=='Y') ) ex.space <- rep('',length(d$Song)) for(i in seq(D)){ex.space[i] <- paste(rep(' ',how.many.Y[i]), sep='', collapse='')} d$Song <- paste(d$Song, ex.space, sep='') ## Songs to be named. name.songs <- d[d$Length >= 260 | d$Length <= 100,] # Very long or very short name.songs <- subset(name.songs, !(Album=='Magical Mystery Tour' & Song=='I am the Walrus')) name.songs <- subset(name.songs, !(Album=='z21' & Song=='Come Together')) named1 <- d[substr(d$Song,1,5) %in% c('Straw', 'You W'),] # too lazy to spell'em out named2 <- d[d$Album=='z01',] # First single named3 <- d[d$Song=='Bad Boy' | d$Song=='Get Back' | d$Song=='Let It Be',] # 3 more name.songs <- rbind(name.songs, named1, named2, named3) ## Writing the song names ## require 'text.with.bg()' function for(i in 1:nrow(name.songs)){ text.with.bg(name.songs$Release.Date[i]-9, name.songs$Length[i], name.songs$Song[i], bkgr=bkgrc1, x.adj=1, width.adj=.3, height.adj=.3, cex=size2, col=textc1) } ## Song names that require an arrow (manually written) arrows(as.Date('1964-06-09'),160, x1<-as.Date('1964-04-09'),203, length=.05, angle=30, code=1, lty=1, lwd=0.4, col=grey1) text.with.bg(x1, 205, 'Long Tall Sally (EP)', bkgr=bkgrc1, x.adj=.5, width.adj=.3, height.adj=.3, cex=size2, font=3, col=textc1) arrows(as.Date('1967-11-28'),158, x1<-as.Date('1967-09-28'),110, length=.05, angle=30, code=1, lty=1, lwd=0.4, col=grey1) text.with.bg(x1, 108, 'Magical Mystery Tour (EP)', bkgr=bkgrc1, x.adj=.5, width.adj=.3, height.adj=.3, cex=size2, font=3, col=textc1) arrows(x0<-as.Date('1966-07-26'), 119.5, x1<-as.Date('1966-05-05'), 110, length=.05, angle=30, code=1, lty=1, lwd=0.4, col=grey1) text.with.bg(x1, 110, 'And Your Bird Can Sing', bkgr=bkgrc1, x.adj=1, width.adj=.3, height.adj=.3, cex=size2, font=1, col=textc1) arrows(x0+12, 119, x0-20, 100, length=.05, angle=30, code=1, lty=1, lwd=0.4, col=grey1) text.with.bg(x0-20, 100, 'For No One', bkgr=bkgrc1, x.adj=.5, width.adj=.3, height.adj=.3, cex=size2, col=textc1) ########## ## text ## ########## ## require text.box() function. txt1 <- c( 'Introduction
The Beatles released <$how.many.songs$> songs, give or take, from their debut single Love Me Do (released October 5, 1962) to their final studio album Let It Be (May 8, 1970), in the space of just under eight years.', 'This plot shows length versus release date of each of their songs. All release dates are for the UK market with one exception (Bad Boy) discussed later.', '

Data and Notation
The closed circles indicate songs included on their 12 studio albums, the titles and release dates of which appear in the abscissa.', 'The Beatles also released 44 songs as singles, shown here with open circles.', 'Sixteen songs were released twice each, both as a single and as part of an album; all of these pairs are identical in both versions except for Revolution, GetBack and LetItBe. YellowSubmarine was released three times: first as a single and on Revolver (both on August 5, 1966), and later on Yellow Submarine (January 17, 1969).', '

The studio albums and singles encompass all but ten of their <$how.many.songs$> songs.', 'Two of their 13 extended play albums (EPs), Long Tall Sally and Magical Mystery Tour, consist of songs that were never released elsewhere.', 'A total of nine songs from these two EPs are also shown in the plot.',# as open circles.', 'Magical Mystery Tour>_ includes I am the Walrus, which is not shown in the plot because it was previously released as a single.', 'One song was never a part of any studio album, single, or EP: Bad Boy was released on December 10, 1966, on a compilation album,', 'A Collection of Beatles Oldies , with 15 other songs, all of which were previously released.', 'This track was available in the US market more than a year earlier, on June 14, 1965, when it was included in Beatles VI, and it is shown with the release date for the US market.', 'This track was recorded on the same day as DizzyMissLizzy, which was subsequently included in the studio album Help!.' ) txt2 <- c( '

The black points are songs credited to Lennon-McCartney,', 'red indicates Harrison as songwriter, the purple originates with other combinations of the Beatles, and the blue points are cover songs.', '

To prevent songs with exactly the same length and release date from appearing on top of each other, data points are jittered horizontally when necessary, by a small amount equivalent to four days.', 'For example, both AndYourBirdCanSing and ForNoOne from Revolver are two minutes and one second long. Also in two instances, a single was released on the exact same day as was an album: once with Beatles For Sale and another with Revolver . These two singles were also moved to the right by four days so the data can be seen.', '

Some Observations
The vast majority of songs in the early phase of their career (up to Revolver) are between two and three minutes in length, perhaps reflecting expectations for radio play at the time.', 'The inclusion of a relatively large number of cover songs during this period may also have been intended to attract a wider audience.', 'All 23 of their cover songs come from this early phase.', '

The end of this early phase is marked by their final concert, which took place on August 29, 1966, 24 days after the release of Revolver.', 'After this concert, the Beatles started to spend much more time in the studio creating less traditional and radio-friendly songs.', 'Songs lasting longer than three minutes became commonplace, though they also released many short songs during this same time period.', 'Furthermore, Harrison\'s contribution was much more prominent during this later phase.' ) txt3 <- c( '

Acknowledgement
Data were gathered from wikipedia.org and modified based on the actual length of songs on CDs, and R version <$r.version$> was used to create the plot.', 'RafeDonahue made a number of suggestions that greatly improved the plot.', 'Thanks also go to ShaunHaskins and LynneBerry who edited the discussion of the plot.', '

The data and R codes are available upon request via email to the author', '(>tatsuki.koyama@vumc.org).' ) if(!FALSE){ ww1 <- text.box( tex=txt1, x=c(as.Date('1962-6-11'), as.Date('1967-2-9')), y=507.00, x.padding=c(100,20), y.padding=c(55,2), vertical.stretch=1.8, paragraph.break=1.8, bkgr.col=bkgrc1, justify=FALSE, slide.quote=TRUE, after.=1.75 ) ny1 <- get.next.y(ww1) ww2 <- text.box( tex=txt2, x=c(as.Date('1962-6-11'), as.Date('1966-05-15')), y=ny1[2], x.padding=c(100,20), y.padding=c(5,5), vertical.stretch=1.8, paragraph.break=1.8, bkgr.col=bkgrc1, justify=FALSE, slide.quote=TRUE, after.=1.75 ) ny2 <- get.next.y(ww2) ww3 <- text.box( tex=txt3, x=c(as.Date('1962-6-11'), as.Date('1965-11-01')), y=ny2[2], x.padding=c(100,20), y.padding=c(5,3), vertical.stretch=1.8, paragraph.break=1.8, bkgr.col=bkgrc1, justify=FALSE, slide.quote=TRUE, after.=1.75 ) } ## Title and Name apos <- strsplit(sQuote(''),'')[[1]][2] suppressWarnings( #title(main=paste('Lengths of the Beatles',apos, ' Songs',sep=''), cex.main=size1*1.2, font.main=2, line=0.4, adj=0) title(main=paste("Lengths of the Beatles'", " Songs",sep=""), cex.main=size1*1.2, font.main=2, line=0.4, adj=0) ) title(main='Tatsuki Koyama, PhD', line=2.2, cex.main=size1*1.2, font.main=2, adj=1) title(main='Department of Biostatistics, Vanderbilt University Medical Center',line=0.4, cex.main=size1, font.main=2, adj=1) title(ylab='Song Length (Minutes)', cex.lab=size1*0.8, mgp=c(1,0,0), font.lab=1, adj=.3) ## Plotting the points finally! singles <- !(as.character(d$Album) %in% album.titles) pch.sa <- ifelse(singles, 21,19) points(d$Release.Date+d$pos2, d$Length, pch=pch.sa, cex=size2,col=pointc[c(d$Credits)]) dev.off()