library(Hmisc)


#read the data
#-----------------------------------------------
x = csv.get("testData.txt")
y = x
class(y$Drug)


#convert from "factor" to "character".
#it is a good practive to save levels
#to be able to convert back to "factor" without loosing levels
# for example: oldLevels = levels(y$Drug)
#-----------------------------------------------
for (n in names(y)) y[[n]] <- as.character(y[[n]])


#get rid of blank spaces, if any
#-----------------------------------------------
for (n in names(y)) y[[n]] <- gsub(" +","",y[[n]])


#find what phases are common for each drug
#-----------------------------------------------
drph = tapply(y$Phase, y$Drug, function(x){y = unique(x);y[order(y)]})
drphS = sapply(drph, function(x)paste(x, collapse=","))


#attach a new variable("drphS") to the dataframe
#way 1:
#y = merge(y, data.frame(Drug=names(drphS), drph = drphS), by="Drug", all.x=TRUE)
#-----------------------------------------------
#way 2:
y$drph = sapply(y$Drug, function(s){drphS[[s]]})


#get rid of "Phase" variable
#(or we could just create a new data.frame without "Phase")
#way 1:
#y$Phase = NULL
#-----------------------------------------------
#way 2:
y = y[,c("drph","Drug","Disease")]


#get rid of duplicated rows and order the dataframe:
#(order by number of characters in "drph" variable)
#-----------------------------------------------
y = unique(y)
y$nch = nchar(y$drph)
y = y[order(y$nch, y$Drug, y$Disease),]


#creating a table:
#get rid of duplicated names
#-----------------------------------------------
y$drphStr = ifelse(duplicated(y$drph), "",y$drph)
y$DrugStr = ifelse(duplicated(y[,c("drph","Drug")]), "",y$Drug)

latex(y[,c("drphStr","DrugStr","Disease")],
      colheads=c("Phase","Drug","Disease"),
      rowname=NULL,
      n.rgroup=tapply(y$nch, y$nch, length))