library(Hmisc) #read the data #----------------------------------------------- x = csv.get("testData.txt") y = x class(y$Drug) #convert from "factor" to "character". #it is a good practive to save levels #to be able to convert back to "factor" without loosing levels # for example: oldLevels = levels(y$Drug) #----------------------------------------------- for (n in names(y)) y[[n]] <- as.character(y[[n]]) #get rid of blank spaces, if any #----------------------------------------------- for (n in names(y)) y[[n]] <- gsub(" +","",y[[n]]) #find what phases are common for each drug #----------------------------------------------- drph = tapply(y$Phase, y$Drug, function(x){y = unique(x);y[order(y)]}) drphS = sapply(drph, function(x)paste(x, collapse=",")) #attach a new variable("drphS") to the dataframe #way 1: #y = merge(y, data.frame(Drug=names(drphS), drph = drphS), by="Drug", all.x=TRUE) #----------------------------------------------- #way 2: y$drph = sapply(y$Drug, function(s){drphS[[s]]}) #get rid of "Phase" variable #(or we could just create a new data.frame without "Phase") #way 1: #y$Phase = NULL #----------------------------------------------- #way 2: y = y[,c("drph","Drug","Disease")] #get rid of duplicated rows and order the dataframe: #(order by number of characters in "drph" variable) #----------------------------------------------- y = unique(y) y$nch = nchar(y$drph) y = y[order(y$nch, y$Drug, y$Disease),] #creating a table: #get rid of duplicated names #----------------------------------------------- y$drphStr = ifelse(duplicated(y$drph), "",y$drph) y$DrugStr = ifelse(duplicated(y[,c("drph","Drug")]), "",y$Drug) latex(y[,c("drphStr","DrugStr","Disease")], colheads=c("Phase","Drug","Disease"), rowname=NULL, n.rgroup=tapply(y$nch, y$nch, length))