PengchengLuRScript < Main < Vanderbilt Biostatistics Wiki

You are here: Vanderbilt Biostatistics Wiki>Main Web>TWikiUsers>PengchengLu>PengchengLuRScript (14 Jun 2009, PengchengLu)EditAttach

My Sample R Script of Batch-Preprocessing Two-color cDNA Microarray Data

slide.data.profile=function(datapath){ setwd(datapath) # datapath is the name of the directory where cDNA row data sets are saved Imagene.files <- list.files(pattern="-mod.txt$") cat(Imagene.files,sep="\n", "\n") ## show slide files channel.names <- gsub("-mod.txt","",Imagene.files)

d.list<-vector("list",length(Imagene.files)) for (i in 1:length(Imagene.files)){ d.listi<-read.Imagene.function(Imagene.files[i]) } nonn<-data.matrix(data.frame(d.list)) dimnames(nonn)2<-channel.names

col.names<-NULL for (i in 1:length(channel.names)){ channel.info<-strsplit(channel.names,"-")i next.col.name<-paste(channel.info[1],channel.info[2],sep="-") col.names<-c(col.names,next.col.name) } dimnames(nonn)2<-col.names

d.list<-vector("list",length(Imagene.files)) for (i in 1:length(Imagene.files)){ d.listi<-read.intensity.raw(Imagene.files[i]) } raw<-data.matrix(data.frame(d.list)) dimnames(raw)2<-channel.names

slide.order.file<-read.delim("slide.order.txt", header=0) # read in slide.order file s<-t(slide.order.file) s<-as.vector(s)

reorder.raw<-raw[,rank(s)] cat(colnames(reorder.raw),sep="\n", "\n")

reorder.nonn<-nonn[,rank(s)] cat(colnames(reorder.nonn),sep="\n", "\n")

## normalize bc data, recall Dr. Dan Nettleton's msn() function source("...\msn.R") norm<-msn(reorder.nonn)

## Scatter plot and MA plot for each slide batch.MA.plot(log.0(nonn), norm)

## side-by-side boxplot for raw data and normalized data opendevice.raw<-function(){ pdf(file="side-by-side boxplot for logged raw data.pdf", width=297/25.4, height=210/25.4) } opendevice.raw.reordered<-function(){ pdf(file="side-by-side boxplot for reordered logged raw data.pdf", width=297/25.4, height=210/25.4) } opendevice.nonn<-function(){ pdf(file="side-by-side boxplot for logged bc data.pdf", width=297/25.4, height=210/25.4) } opendevice.nonn.reordered<-function(){ pdf(file="side-by-side boxplot for reordered logged bc data.pdf", width=297/25.4, height=210/25.4) } opendevice.norm<-function(){ pdf(file="side-by-side boxplot for reordered normalized data.pdf", width=297/25.4, height=210/25.4) } closedevice<-function(){ dev.off() }

side.by.side.raw.boxplot<-function(dat){ opendevice.raw() boxplot(as.data.frame(log.0(dat)),medlwd=1, medcol="blue", main="side-by-side boxplot for logged raw data", xlab="channel", ylab="log raw signal",axes=F) axis(1,labels=1:ncol(dat),at=1:ncol(dat)) axis(2) box() closedevice() }

side.by.side.reordered.raw.boxplot<-function(dat){ opendevice.raw.reordered() boxplot(as.data.frame(log.0(dat)),medlwd=1, medcol="blue", main="side-by-side boxplot for reordered logged raw data", xlab="channel", ylab="log raw signal",axes=F) axis(1,labels=1:ncol(dat),at=1:ncol(dat)) axis(2) box() closedevice() }

side.by.side.bc.boxplot<-function(dat){ opendevice.nonn() boxplot(as.data.frame(log.0(dat)),medlwd=1, medcol="blue", main="side-by-side boxplot for logged bc data", xlab="channel", ylab="log bc signal",axes=F) axis(1,labels=1:ncol(dat),at=1:ncol(dat)) axis(2) box() closedevice() }

side.by.side.reordered.bc.boxplot<-function(dat){ opendevice.nonn.reordered() boxplot(as.data.frame(log.0(dat)),medlwd=1, medcol="blue", main="side-by-side boxplot for reordered logged bc data", xlab="channel", ylab="log bc signal",axes=F) axis(1,labels=1:ncol(dat),at=1:ncol(dat)) axis(2) box() closedevice() }

side.by.side.norm.boxplot<-function(dat){ opendevice.norm() boxplot(as.data.frame(dat),medlwd=1, medcol="blue", main="side-by-side boxplot for normalized reordered data", xlab="channel", ylab="normalized signal",axes=F) axis(1,labels=1:ncol(dat),at=1:ncol(dat)) axis(2) box() closedevice() }

side.by.side.raw.boxplot(raw) ## side-by-side boxplot for raw data side.by.side.reordered.raw.boxplot(reorder.raw) ## side-by-side boxplot for reordered raw data side.by.side.bc.boxplot(nonn) ## side-by-side boxplot for bc data side.by.side.reordered.bc.boxplot(reorder.nonn) ## side-by-side boxplot for reordered bc data side.by.side.norm.boxplot(norm) ## side-by-side boxplot for normalized data return(raw, nonn, reorder.raw, reorder.nonn, norm)

}

# The following 5 functions will be recalled

read.Imagene.function<-function(txt.file){ text.file<-read.delim(txt.file, header = TRUE, sep = "\t") bc.intensity<-text.file$Signal.Mean-text.file$Background.Median return(bc.intensity) }

read.intensity.raw<-function(txt.file){ text.file<-read.delim(txt.file, header = TRUE, sep = "\t") sig.intensity<-text.file$Signal.Mean return(sig.intensity) }

batch.MA.plot<-function(raw,norm){ n<-1:ncol(raw) m<-c(1:(0.5*length(n))) m<-2*m n[-m]

for (i in n[-m]){ x11() par(mfrow=c(2,2), oma=c(2,0,3,0)) ch.comp.plot(raw[,i], raw[,i+1]) ch.comp.plot(norm[,i],norm[,i+1]) title(paste("Scatter Plot and M-A Plot_Slide",(i+1)/2), cex=1.2,outer=T) mtext("for logged bc data vs Normalized data", cex=0.8, outer=T) f<-paste("MA plot for slide",(i+1)/2) savePlot(filename=f,type="pdf",device = dev.cur()) } }

ch.comp.plot <- function(x, y) { par(pty = "s") r <- range(c(x, y)) plot(x, y, xlim = r, ylim = r, pch=20, cex=0.5, main="Scatter Plot") lines(r, r, col="blue") lines(lowess(x,y, f=0.01),lwd=1,col="red") par(pty = "m") M <- y - x A <- 0.5 * (x + y) plot(A, M, pch=20, cex=0.5, main="M-A Plot") lines(c(-10, 300), c(0, 0), col="blue") lines(lowess(A,M,f=0.01),lwd=1,col="red") }

log.0<-function(x){ x[x<0]<-0 y<-log(1+x) return(y) }

My modified R code of reporting Up-regulated and Down-regulated genes separately at .html format for Conte project

limma2annsepUP<-function(eset, fit, design, contrast, lib, adjust = "fdr", anncols = aaf.handler()[c(1:3, 6:7, 9:12)], number = 30, pfilt = NULL, fldfilt = NULL, tstat = TRUE, pval = TRUE, FC = TRUE, expression = TRUE, html = TRUE, text = FALSE, save = FALSE, addname = NULL, interactive = TRUE){

if(interactive){ limma2annsepUP.na(eset = eset, fit = fit, design = design, contrast = contrast, lib = lib, adjust = adjust, anncols = anncols, number = number, pfilt = pfilt, fldfilt = fldfilt, tstat = tstat, pval = pval, FC = FC, expression = expression, html = html, text = text, save = save, addname = addname) }else{ require(annaffy, quietly = TRUE)

tables <- vector("list", dim(contrast)[2]) for(i in seq(along = colnames(contrast))){ tablesi <- tableFilt(fit, coef = i, number = number, pfilt = pfilt, fldfilt = fldfilt, adjust = adjust) }

##Check to see if the table dimensions are ok for the end user ##This part needs some error handling... rn <- vector() for(i in 1:length(tables))rn[i] <- dim(tablesi)[1] cat("\nYou are going to output", length(tables),"tables,", "\nWith this many genes in each:\n", rn) cat("\n") doit <- getans2("Do you want to accept or change these values?", allowed=c("a","c")) if(doit == "c"){ dopval <- getans2("Do you want to change the p-value?", allowed=c("y","n")) if(dopval == "y") pfilt <- getpfil() dofld <- getans2("Do you want to change the fold filter?", allowed = c("y", "n")) if(dofld == "y") fldfilt <- getfldfil() limma2annaffy(eset=eset, fit=fit, design=design, contrast=contrast, lib=lib, anncols=anncols, pfilt=pfilt, fldfilt=fldfilt, adjust=adjust, tstat=tstat, pval=pval, FC=FC, expression=expression, html=html, text=text) options(show.error.messages = FALSE) stop() } ## Use topTables to make output tables ## Get filename from contrast matrix for(i in 1:length(tables)){ if(dim(tablesi)[1]>0){ index <- as.numeric(row.names(tablesi)) if(length(which(contrast[,i] > 0)) == 1){ grp1 <- which(design[,which(contrast[,i] > 0)] > 0) }else{ grp1 <- which(rowSums(design[,which(contrast[,i] > 0)]) > 0) } if(length(which(contrast[,i] < 0)) == 1){ grp2 <- which(design[,which(contrast[,i] < 0)] > 0) }else{ grp2 <- which(rowSums(design[,which(contrast[,i] < 0)]) > 0) } grps <- c(grp1, grp2)

filename <- colnames(contrast)[i] if(is.null(addname)) filename <- paste(filename, addname, sep=" ") ## Remove illegal characters from filename if(length(grep("[/|\\|?|*|:|<|>|\"|\\|]", filename)) > 0) warning(paste("Some illegal characters have been removed from the filename", filename, sep = " "), call. = FALSE) filename <- gsub("[/|\\|?|*|:|<|>|\"|\\|]", "", filename)

## Make aafTable object probeids <- featureNames(eset)[index] anntable <- aafTableAnn(probeids, lib, anncols) if(tstat) testtable <- aafTable("t-statistic" = round(tablesi[,"t"],2)) if(pval){ if(exists("testtable")){ testtable <- aafTable("p-value" = round(tablesi[,"adj.P.Val"],10)) }else{ testtable <- merge(testtable, aafTable("p-value" = round(tablesi[,"adj.P.Val"],10))) } } if(FC){ fld <- tablesi[,2] if(exists("testtable")){ testtable <- aafTable("fc" = round(2^fld, 3)) }else{ testtable <- merge(testtable, aafTable("fc" = round(2^fld, 3))) } }

if(exists("testtable")) anntable <- merge(anntable, testtable)

if(expression) exprtable <- aafTableInt(eset[,grps], probeids=probeids)

if(exists("exprtable")) anntable <- merge(anntable, exprtable)

if(html)T saveHTML(anntable, paste(filename,"html", sep="."), filename) if(text) saveText(anntable, paste(filename, "txt", sep="."), header=TRUE) } } if(save) return(tables) } }

limma2annsepUP.na<-function(eset, fit, design, contrast, lib, adjust = "fdr", anncols = aaf.handler()[c(1:3, 6:7, 9:12)], number = 30, pfilt = NULL, fldfilt = NULL, tstat = TRUE, pval = TRUE, FC = TRUE, expression = TRUE, html = TRUE, text = FALSE, save = FALSE, addname = NULL){

require(annaffy, quietly = TRUE)

tables <- vector("list", dim(contrast)[2]) for(i in seq(along = colnames(contrast))){ tablesi <- tableFilt(fit, coef = i, number = number, pfilt = pfilt, fldfilt = fldfilt, adjust = adjust) } ## Use topTables to make output tables ## Get filename from contrast matrix for(i in 1:length(tables)){ if(dim(tablesi)[1]>0){ index <- as.numeric(row.names(tablesi)) if(length(which(contrast[,i] > 0)) == 1){ grp1 <- which(design[,which(contrast[,i] > 0)] > 0) }else{ grp1 <- which(rowSums(design[,which(contrast[,i] > 0)]) > 0) } if(length(which(contrast[,i] < 0)) == 1){ grp2 <- which(design[,which(contrast[,i] < 0)] > 0) }else{ grp2 <- which(rowSums(design[,which(contrast[,i] < 0)]) > 0) } grps <- c(grp1, grp2)

## Make aafTable object probeids <- featureNames(eset)[index] anntable <- aafTableAnn(probeids, lib, anncols) if(tstat) testtable <- aafTable("t-statistic" = round(tablesi[,"t"],2)) if(pval){ if(exists("testtable")){ testtable <- aafTable("p-value" = round(tablesi[,"adj.P.Val"],10)) }else{ testtable <- merge(testtable, aafTable("p-value" = round(tablesi[,"adj.P.Val"],10))) } } if(FC){ fld <- tablesi[,2] if(exists("testtable")){ testtable <- aafTable("fc" = round(2^fld, 2)) }else{ testtable <- merge(testtable, aafTable("fc" = round(2^fld, 2))) } }

if(exists("testtable")) anntable <- merge(anntable, testtable) anntable<-anntable[anntable$fc>1,]

if(expression) exprtable <- aafTableInt(eset[,grps], probeids=probeids)

if(exists("exprtable")) anntable <- merge(anntable, exprtable)

if(html) saveHTML(anntable, paste(filename,"html", sep="."), filename) if(text) saveText(anntable, paste(filename, "txt", sep="."), header=TRUE) } } if(save) return(tables) }

#################################################################################################

limma2annsepDOWN<-function(eset, fit, design, contrast, lib, adjust = "fdr", anncols = aaf.handler()[c(1:3, 6:7, 9:12)], number = 30, pfilt = NULL, fldfilt = NULL, tstat = TRUE, pval = TRUE, FC = TRUE, expression = TRUE, html = TRUE, text = FALSE, save = FALSE, addname = NULL, interactive = TRUE){

if(interactive){ limma2annsepDOWN.na(eset = eset, fit = fit, design = design, contrast = contrast, lib = lib, adjust = adjust, anncols = anncols, number = number, pfilt = pfilt, fldfilt = fldfilt, tstat = tstat, pval = pval, FC = FC, expression = expression, html = html, text = text, save = save, addname = addname) }else{ require(annaffy, quietly = TRUE)

tables <- vector("list", dim(contrast)[2]) for(i in seq(along = colnames(contrast))){ tablesi <- tableFilt(fit, coef = i, number = number, pfilt = pfilt, fldfilt = fldfilt, adjust = adjust) }

if(exists("testtable")) anntable <- merge(anntable, testtable)

if(expression) exprtable <- aafTableInt(eset[,grps], probeids=probeids)

if(exists("exprtable")) anntable <- merge(anntable, exprtable)

if(html)T saveHTML(anntable, paste(filename,"html", sep="."), filename) if(text) saveText(anntable, paste(filename, "txt", sep="."), header=TRUE) } } if(save) return(tables) } }

limma2annsepDOWN.na<-function(eset, fit, design, contrast, lib, adjust = "fdr", anncols = aaf.handler()[c(1:3, 6:7, 9:12)], number = 30, pfilt = NULL, fldfilt = NULL, tstat = TRUE, pval = TRUE, FC = TRUE, expression = TRUE, html = TRUE, text = FALSE, save = FALSE, addname = NULL){

require(annaffy, quietly = TRUE)

## Make aafTable object probeids <- featureNames(eset)[index] anntable <- aafTableAnn(probeids, lib, anncols) if(tstat) testtable <- aafTable("t-statistic" = round(tablesi[,"t"],2)) if(pval){ if(exists("testtable")){ testtable <- aafTable("p-value" = round(tablesi[,"adj.P.Val"],10)) }else{ testtable <- merge(testtable, aafTable("p-value" = round(tablesi[,"adj.P.Val"],10))) } } if(FC){ fld <- tablesi[,2] if(exists("testtable")){ testtable <- aafTable("fc" = round(2^fld, 2)) }else{ testtable <- merge(testtable, aafTable("fc" = round(2^fld, 2))) } }

if(exists("testtable")) anntable <- merge(anntable, testtable) anntable<-anntable[anntable$fc<1,]

if(expression) exprtable <- aafTableInt(eset[,grps], probeids=probeids)

if(exists("exprtable")) anntable <- merge(anntable, exprtable)

if(html) saveHTML(anntable, paste(filename,"html", sep="."), filename) if(text) saveText(anntable, paste(filename, "txt", sep="."), header=TRUE) } } if(save) return(tables) }

Topic revision: r5 - 14 Jun 2009, PengchengLu

Main

Department Home Page

Biostatistics Graduate Program

Vanderbilt University Medical Center

Biostatistics Webs
- Archive
- Main
- Sandbox
- System

Copyright © 2013-2022 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding Vanderbilt Biostatistics Wiki? Send feedback