readin <- function(filename) { metamatrix1 <- matrix(c(1,0,0,1,1,0,0,1,0,1,2,1,0,1,1,0,0,1), nrow=6, byrow=T) metamatrix2 <- metamatrix1 %x% metamatrix1 stmatrix2 <- metamatrix2 stmatrix2[apply(stmatrix2, 1, sum) == 1, ] <- 0 #### data processing #### For each family, the first two records must be parents rawdata <- read.table(filename, col.names=c("famid", "mk1", "mk2"), skip=1, na.strings=".") # na.strings="-2", sep=",") nrecord <- dim(rawdata)[1] data1 <- matrix(0,36,9) i <- 1; oldfamid <- -1; skipped <- 0 while (i <= nrecord) { if(rawdata[i,"famid"] == oldfamid) { i <- i + 1; next } oldfamid <- rawdata[i,"famid"] if(oldfamid != rawdata[i+1,"famid"] | oldfamid != rawdata[i+2,"famid"]) { message("Family ", oldfamid, " has less than 3 individuals -- skipped") i <- i + 1 skipped <- skipped + 1; next } ## genotypes of two parents and first offspring mk1p1 <- rawdata[i,"mk1"]; mk2p1 <- rawdata[i,"mk2"] mk1p2 <- rawdata[i+1,"mk1"]; mk2p2 <- rawdata[i+1,"mk2"] mk1o <- rawdata[i+2,"mk1"]; mk2o <- rawdata[i+2,"mk2"] i <- i + 3 if(any(is.na(c(mk1p1, mk1p2, mk2p1, mk2p2)))) { message("Missing parental genotype in family ", oldfamid, " -- skipped") skipped <- skipped + 1; next } ## If current offspring has missing genotypes and the next record is in ## the same family, use the next person as the offspring while(any(is.na(c(mk1o, mk2o))) & oldfamid == rawdata[i,"famid"]) { mk1o <- rawdata[i,"mk1"]; mk2o <- rawdata[i,"mk2"] i <- i + 1 } if(any(is.na(c(mk1o, mk2o)))) { message("All offspring have missing genotypes in family ", oldfamid, " -- skipped") skipped <- skipped + 1; next } if(sum(c(mk1p1,mk1p2,mk2p1,mk2p2,mk1o,mk2o) %in% 0:2) != 6) { message("Genotype different from 0, 1, 2 in family ", oldfamid, " -- skipped") skipped <- skipped + 1; next } ## mating type: 0x0, 0x1, 0x2, 1x1, 1x2, 2x2 coded as 0 to 5 mt1 <- min(mk1p1, mk1p2) + mk1p1 + mk1p2; if(mt1 == 6) mt1 <- mt1 - 1 mt2 <- min(mk2p1, mk2p2) + mk2p1 + mk2p2; if(mt2 == 6) mt2 <- mt2 - 1 if(metamatrix1[mt1+1, mk1o+1] == 0 | metamatrix1[mt2+1, mk2o+1] == 0) stop("Inconsistent genotypes exist for family ", oldfamid) data1[mt1*6+mt2+1, mk1o*3+mk2o+1] <- data1[mt1*6+mt2+1, mk1o*3+mk2o+1] + 1 } message("\nSkipping ", skipped, " families.") message("\n", sum(data1), " trio are retained for analysis, among which ", sum(data1[stmatrix2 > 0]), " are informative.") data1 } TDT2L <- function(filename) { #### Structural matrices ## Trio genotype matrix for one marker ## The 6 rows are: 0x0, 0x1, 0x2, 1x1, 1x2, 2x2; the 3 columns are: 0, 1, 2 metamatrix1 <- matrix(c(1,0,0,1,1,0,0,1,0,1,2,1,0,1,1,0,0,1), nrow=6, byrow=T) ## Trio genotype matrix for two markers ## The 36 rows: (0x0,0x0), (0x0,0x1), ..., (0x0,2x2), (0x1,0x0), ..., (2x2,2x2) ## The 9 columns: (0,0), (0,1), (0,2), (1,0), ..., (2,2) metamatrix2 <- metamatrix1 %x% metamatrix1 ## Two-marker matrix of 91 informative trios stmatrix2 <- metamatrix2 stmatrix2[apply(stmatrix2, 1, sum) == 1, ] <- 0 #### Effect matrix for reduced models: ### The 9 rows are two-marker genotypes (0,0), (0,1), (0,2), (1,0), ..., (2,2) ## No interaction between two markers: 4 parameters ## The columns are effects for (0,1), (0,2), (1,0), (2,0) rdmatrix <- matrix(0,9,4) rdmatrix[c(2,5,8), 1] <- 1 rdmatrix[c(3,6,9), 2] <- 1 rdmatrix[4:6, 3] <- 1 rdmatrix[7:9, 4] <- 1 ## 1st marker effect only: 2 parameters for effects of (1,x), (2,x) rdmatrixm1 <- matrix(0,9,2) rdmatrixm1[4:6, 1] <- 1 rdmatrixm1[7:9, 2] <- 1 ## 2nd marker effect only: 2 parameters for effects of (x,1), (x,2) rdmatrixm2 <- matrix(0,9,2) rdmatrixm2[c(2,5,8), 1] <- 1 rdmatrixm2[c(3,6,9), 2] <- 1 #### Read in real data data1 <- readin(filename) #### Check and re-format data ## checking for inconsistence if (sum(data1[!metamatrix2] != 0)) stop("Inconsistent genotypes exist.") ## identify those cells that will be used in analysis: parental mating types ## that exist in data. nonzero <- ((apply(data1, 1, sum) != 0) %*% t(rep(1,9))) & stmatrix2 ## data in regression format with 91 observations data2 <- data.frame(count=as.numeric(t(data1)), mattype=factor(rep(1:36, each=9)), offgeno=factor(rep(1:9, 36)), offset=as.numeric(t(log(stmatrix2))), rdmat=matrix(rep(t(rdmatrix),36), nrow=36*9, byrow=T), rdmatm1=matrix(rep(t(rdmatrixm1),36), nrow=36*9,byrow=T), rdmatm2=matrix(rep(t(rdmatrixm2),36), nrow=36*9,byrow=T)) data2 <- data2[as.numeric(t(nonzero)) != 0, ] #### data analysis ## full two-locus model, with 8 parameters for genotypes TDT2LF <- glm(count ~ mattype + offgeno, offset=offset, family = poisson, data=data2) ## reduced model: no interaction, with 4 parameters for genotypes TDT2LR <- update(TDT2LF, count ~ mattype + rdmat.1 + rdmat.2+rdmat.3+rdmat.4) ## reduced model: 1st marker only, with 2 parameters for genotypes TDTM1F <- update(TDT2LF, count ~ mattype + rdmatm1.1 + rdmatm1.2) ## reduced model: 2nd marker only, with 2 parameters for genotypes TDTM2F <- update(TDT2LF, count ~ mattype + rdmatm2.1 + rdmatm2.2) #### Output of data analysis: 4 tests (statistics, df, p-values) #### CIs of 8 genotypic effects message("Model deviances") message(paste(" Two-locus model with full interaction:", round(TDT2LF$deviance,4))) message(paste(" without interaction:", round(TDT2LR$deviance,4))) message(paste(" One-locus model for marker 1:", round(TDTM1F$deviance,4))) message(paste(" for marker 2:", round(TDTM2F$deviance,4))) message("\nTest for interaction effect:") tmp = anova(TDT2LR, TDT2LF, test="Chisq") message(" Difference in deviances DF P-value") message(paste(" ", round(tmp[2,4],4), " ", tmp[2,3], " ", signif(tmp[2,5],2))) message("\nTest for second marker effect given first marker:") message(" Alternative model Difference in deviances DF P-value") tmp = anova(TDTM1F, TDT2LF, test="Chisq") message(paste(" with interaction ", round(tmp[2,4],4), " ", tmp[2,3], " ", signif(tmp[2,5],2))) tmp = anova(TDTM1F, TDT2LR, test="Chisq") message(paste(" without interaction ", round(tmp[2,4],4), " ", tmp[2,3], " ", signif(tmp[2,5],2))) message("\nTest for first marker effect given second marker:") message(" Alternative model Difference in deviances DF P-value") tmp = anova(TDTM2F, TDT2LF, test="Chisq") message(paste(" with interaction ", round(tmp[2,4],4), " ", tmp[2,3], " ", signif(tmp[2,5],2))) tmp = anova(TDTM2F, TDT2LR, test="Chisq") message(paste(" without interaction ", round(tmp[2,4],4), " ", tmp[2,3], " ", signif(tmp[2,5],2))) } ##TDT2L("Y:/Studies/Sutcliffe/tdt2genes_20050629b.txt")