tigol <- function(x){exp(x)/(1+exp(x))}

rm.na <- function(mat, which.column=2){
	# removes NA
	wc <- which.column
	mat[!is.na(mat[,wc]),]}


nan <- function(vec){
	# removes NA from a vector
	vec[!is.na(vec)]}

frop <- function(vec, what=c('equal', 'less', 'greater'), val){
	
	# proportion of 'vec' that satisfies < > or = val
	
		eq <- function(x, y){x ==y}
		le <- function(x, y){x < y}
		gr <- function(x, y){x > y}
		
		hm <- switch(what, equal = eq(vec, val), less = le(vec, val), greater = gr(vec, val))
		
		SATISF <- sum(hm)
		LENGTH <- length(vec)
		PROPOR <- round(SATISF / LENGTH, 4)
	
		cbind(SATISF, LENGTH, PROPOR)}
	


cc.mat <- function(mat, col.num=0){
	# Input = matrix 
	# This will separate the mat into two.
	# with or w/o NA in the columns specified by
	# col.num (colum number)
	# Default col.num=0 is 'all the columns'.  

	nr <- dim(mat)[1] ; nc <- dim(mat)[2]

	ID <- seq(1, nr)
	if(sum(col.num)==0) { cn <- seq(1, nc)
	} else { cn <- col.num }

	cc <- complete.cases(mat[,cn])

	if(sum(cc)!=0) { matc <- mat[ID[cc],]
	} else { matc <- NULL }
	if(prod(cc)!=1) { matd <- mat[ID[!cc],]
	} else { matd <- NULL }
	return(list(matc, matd))
}


ttt <- function(matc, fac=1){
	# This will create a 2x2 table of 
	# a specified and all other factors. 
	# Input = matrix of only categorical variables
	# fac = the important factor.  

	ncr <- dim(matc) ; nfac <- ncr[2]
	ans <- as.list(1:nfac)
	for(i in 1:nfac){ ans[[i]] <- table(matc[,i], matc[,fac])}
	list(ans, names(matc))
}


LOGITpx <- function(p){log(p/(1-p))} 
LOGITxp <- function(x){exp(x)/(exp(x)+1)}

find.pos <- function(vec, value){
	# This finds the position of "value" in "vec"
	v <- seq(1, length(vec))
	v[vec==value]}

find.na <- function(vec){
	# This finds the position of NA in 'vec'
	v <- 1:length(vec)
	v[is.na(vec)]}
	
fill.na <- function(vec, v){
	# This replace NA's in vec by v.

	LV <- length(v)
	if(LV != sum(is.na(vec))) { stop('number of NA in vec must be equal to length(v).')}
	
	for(i in 1:LV){vec[find.na(vec)[i]] <- v[i]}
	vec}



find.na.pos <- function(vec, nonNA=F){
	# This find the position of "NA" in "vec"
	# If nonNA=T, then it finds the postions of non "NA"
	v <- seq(1, length(vec)) 
	ans <- v[is.na(vec)]
	if(nonNA){ans <- v[!is.na(vec)]}
	ans
	}


rem.row <- function(mat, row.n){
	# This removes a row from a matrix or a dataframe.
	# The removed row is stored for later use.

	nr <- dim(mat)[1] ; v <- c(1:nr)
	who <- v!=row.n
	stay <- v[who]
	list(mat[stay,], mat[row.n,])
}


normal.check <- function(v){
	# Histogram and overlayed normal density.

	minv <- min(v) ; maxv <- max(v) ; len <- length(v)
	x <- seq(minv, maxv, length=len)
	y <- dnorm(x, mean(x), sd(x))
	hist(v, freq=F)
	lines(x, y, type="l")
}


my.freq.table <- function(v1, v2){	
	# Use my.ft instead!
	
	# Form a 2x2 table from 2 factors.
		v1 <- factor(v1) ; v2 <- factor(v2)
	temp <- table(v1, v2)
		rou <- function(x){round(x,3)}
	r1 <- sum(temp[1,]) ; r2 <- sum(temp[2,]) ; rt <- r1 + r2
	c1 <- sum(temp[,1]) ; c2 <- sum(temp[,2])
	rp11 <- rou(temp[1,1]/r1) ; rp12 <- rou(temp[1,2]/r1) ; rp1 <- rp11 + rp12
	rp21 <- rou(temp[2,1]/r2) ; rp22 <- rou(temp[2,2]/r2) ; rp2 <- rp21 + rp22
	rpt1 <- rou(c1/rt) ; rpt2 <- rou(c2/rt) ; rptt <- rpt1 + rpt2 

	temp4 <- data.frame(matrix(c(rp11, rp21, rpt1, rp12, rp22, rpt2, rp1, rp2, 1), 
		nrow=3), row.names= c(levels(v1), "Total"))
	names(temp4) <- c(levels(v2), "Total")
	temp4}
	
my.table <- function(v1, v2){
	# Use my.ft instead!
	
	# Form a 2x2 table from 2 factors.
		v1 <- factor(v1) ; v2 <- factor(v2)
	temp <- table(v1, v2)
		rou <- function(x){round(x,3)}
	r1 <- sum(temp[1,]) ; r2 <- sum(temp[2,])
	c1 <- sum(temp[,1]) ; c2 <- sum(temp[,2])
	gt <- r1 + r2
	temp2 <- cbind(temp, c(r1, r2))
	temp3 <- rbind(temp2, c(c1, c2, gt))

	temp5 <- my.freq.table(v1, v2) ; temp6 <- my.freq.table(v2, v1)

	n11 <- temp[1,1] ; n12 <- temp[1,2] ; n21 <- temp[2,1] ; n22 <- temp[2,2]
	odds <- (n11*n22)/(n12*n21)
	sighat <- (1/(n11+.5) + 1/(n12+.5) + 1/(n21+.5) + 1/(n22+.5))^.5
	con <- c(log(odds) - qnorm(1-.025) * sighat, log(odds), log(odds) + qnorm(1-.025) * sighat)
	orig <- exp(con)
	
	list(temp3, temp5, temp6, round(con, 4), round(orig, 4))}

	
my.ft <- function(v1, v2, ...){

	# For a rxc table from 2 factors.
	
	var.name1 <- substitute(v1) ; var.name2 <- substitute(v2)
	
		v1 <- factor(v1) ; v2 <- factor(v2)
	temp <- table(v1, v2)
		rou <- function(x){round(x, 4)}
	r.total <- as.numeric(apply(temp, 1, sum))
	c.total <- as.numeric(apply(temp, 2, sum))
	
	temp2 <- rou(temp/r.total)
	temp3 <- rou(t(t(temp)/c.total))
	
	gt1 <- apply(temp, 1, sum) ; gt2 <- c(apply(temp, 2, sum), sum(gt1))
	
	Atemp <- rbind(cbind(temp, gt1), gt2)
		Atem <- data.frame(Atemp, row.names=c(levels(v1), "Total"))
		names(Atem) <- c(levels(v2), "Total")
			
	gtem2 <- apply(temp2, 1, sum) 
		tem2 <- data.frame(cbind(temp2, gtem2))
		names(tem2) <- c(levels(v2), "Total")
	gtem3 <- apply(temp3, 2, sum) 
		tem3 <- data.frame(rbind(temp3, gtem3), row.names=c(levels(v1), "Total"))
		names(tem3) <- c(levels(v2))
	
	list(table(v1), Atem, tem2, tem3, table(v1,v2, ...))

}
	


prop <- function(v01){
	# v01 is a vector of 0's and 1's.
	# calculate proportion of 1.
	sum(v01)/length(v01)}

BtCor <- function(v1, v2, r){
	# find correlation from Bootstrap samples
	rv <- seq(1, rv)
	for(i in rv){rv[i] <- cor(v1, sample(v2, length(v2), T))}
	rv}


SplitMat <- function(mat, col.num){
	# This splits a matrix (data.frame) into two
	# matrices according to a dichotomous w-th column

	nr <- dim(mat)[1] ; nc <- dim(mat)[2]

        ID <- seq(1, nr)

	co <- mat[,col.num]
	w <- unique(co)
	if(length(w) != 2){
	stop("\n", "\n", "Column specified by 'col.num' must be dichotomous.", "\n", "\n")}
	w1 <- w[1] ; w2 <- w[2]
	one <- ID[co==w1] ; two <- ID[co==w2]
		
	m1 <- mat[one,] ; m2 <- mat[two,]
	list(m1, m2)
}


odds.ratio2 <- function(mat, alpha=.05, con.cor =T){
	# mat is a 2x2 matirix
	# con.cor = continuity correction
	
	if(sum(dim(mat) == c(2,2))!=2){stop("\n", "Input must be a 2x2 matrix.", "\n")}
	
	cc <- c(0, .5)[con.cor+1]
	
	n11 <- mat[1,1] ; n12 <- mat[1,2] ; n21 <- mat[2,1] ; n22 <- mat[2,2]
	m11 <- n11 + cc ; m12 <- n12 + cc ; m21 <- n21 + cc ; m22 <- n22 + cc
	
	#theta <- (n11*n22)/(n12*n21)
	theta <- (m11*m22)/(m12*m21)  # with continuity correction
	sighat <- sqrt(1/m11 + 1/m12 + 1/m21 + 1/m22)
	
	za <- qnorm(1-alpha/2)
	log.ci <- round(c(log(theta) - za * sighat, log(theta), log(theta) + za * sighat), 5)
	ori.ci <- round(exp(log.ci), 5)
	rev.or <- round((1/ori.ci)[c(3, 2, 1)], 5)
	
	Log.o.r. <- log(theta)
	ase <- sighat
	zz <- Log.o.r./ase
	pp <- 2*(1-pnorm(abs(zz)))
	
	summ <- data.frame(Log.Odds.Ratio=Log.o.r., ASE=ase, Z=zz, P=pp)
	
	o.r. <- data.frame(round(rbind(log.ci, ori.ci, rev.or), 4))
	names(o.r.) <- c('LWR', 'Point', 'UPR')
	
	
	r.s <- apply(mat, 2, sum)
	c.s <- apply(mat, 1, sum)	
	g.s <- sum(r.s)
	
	ans <- data.frame(cbind(rbind(mat, r.s), c(c.s, g.s)), row.names=c("  1  ", "  2  ", "Total"))
	names(ans) <- c(" A ", " B ", "Total")
	
	list(summ, o.r., ans)
	}
	
odds.ratio <- function(mat, alpha=.05, con.cor=T){
	temp <- odds.ratio2(mat, alpha, con.cor)
	list(temp[[1]], temp[[2]])}

	
	
# see http://aoki2.si.gunma-u.ac.jp/R/kruskal-wallis.html

kruskal.wallis <- function(data, group){

    OK <- complete.cases(data, group)
    r <- rank(data <- data[OK])    
    n <- sum(ni<- table(group <- group[OK]))
    Ri <- tapply(r, group, sum)
    S <- 12*sum(Ri^2/ni)/(n*(n+1))-3*(n+1)
    if (length(unique(data)) != n) {
        tie <- table(data)
        S <- S/(1-sum(tie^3-tie)/(n^3-n))
    }
    df <- (a <- length(ni))-1

    result1 <- data.frame(S, df,  pchisq(S, df, low=F))
    names(result1) <- c("Statistics(Chi-sq. value)", "d.f.", "P value")
    a.mean <- (n+1)/2
    R.mean <- Ri/ni
    V <- sum((r-a.mean)^2)/(n-1)
    S <- outer(R.mean, R.mean, function(x, y){(x-y)^2})/outer(ni, ni, function(n1, n2) { (1/n1+1/n2)*V })
    S <- S[lower.tri(S)]
    g1 <- g2 <- numeric(a*(a-1)/2)
    count <- 0
    for (i in 1:(a-1)) {
        for (j in (i+1):a) {
            count <- count+1
            g1[count] <- i
            g2[count] <- j
        }
    }
    p <- as.numeric(formatC(pchisq(S, df, lower=F), format='f', digits=6, mode='real'))
    result2 <- cbind(S, p)
    rownames(result2) <- paste(g1, g2, sep=":")
    list(result1=result1, result2=result2)
    }

# see http://aoki2.si.gunma-u.ac.jp/R/Cochran-Armitage.html

Cochran.Armitage <- function(r.i, n.i, x.i=1:length(r.i))
{
    stopifnot((k <- length(r.i)) == length(n.i), length(n.i) == length(x.i))

    p.i <- r.i/n.i
    p.m <- (r <- sum(r.i))/(n <- sum(n.i))
    xx <- x.i-(x.m <- sum(n.i*x.i)/n)
    b <- sum(n.i*(p.i-p.m)*xx)/sum(n.i*xx^2)
    a <- p.m-b*x.m
    
    xt <- b^2*sum(n.i*xx^2)/(p.m*(1-p.m))
    xh <- n^2*(sum(r.i^2/n.i)-r^2/n)/r/(n-r)
    xq <- xh-xt
    
    res <- matrix(c(xt, xq, xh, 1, k-2, k-1, 1-pchisq(xt, 1), 1-pchisq(xq, k-2), 1-pchisq(xh, k-1)), nrow=3)
    colnames(res) <- c("Chi-sq.", "d.f.", "P value")
    rownames(res) <- c("Trend", "Quad.", "Homo.")
    res
}





medQ <- function(vec, groups){
	# gives the median and the 1st and 3rd quartiles.
	
	unique.g <- unique(groups)
	ngroup <- length(unique.g)
	temp <- as.list(1:ngroup)
	for(i in 1:ngroup){temp[[i]] <- vec[groups==as.vector(unique.g[i])]}
	
	ans <- matrix(0, nrow=ngroup, ncol=3)
	for(i in 1:ngroup){ans[i,] <- summary(temp[[i]])[c(2,3,5)]}
	ansd <- data.frame(ans, row.names=unique.g)
	names(ansd) <- c('1st Q', 'Median', '3rd Q')
	ansd}


sort.mat <- function(mat, i){
	# this will sort a matrix based on the column i
	# i is a vector of length 1
	mat[order(mat[,i]),]}


sort.mat.re <- function(mat, i, re=T){

	plusminus <- c(1, -1)[re+1]

	mat[order(mat[,i] * plusminus),]}
	

sort.mat3 <- function(mat, sort.by, decreasing = F){
        
        # This will sort a matrix or a data.frame based on the columns 'sort.by.'
        # 'sort.by' and 'decreasing' are  vectors of any length with 
	 # length(sort.by) >= length(decreasing)

        v <- rev(sort.by) ; de <- decreasing
        if( length(de) < length(v) ){de[(length(de)+1) : length(v)] <- F}

        for(i in 1:length(v)){
                mat <- mat[order(mat[,v[i]], decreasing=rev(de)[i]),]}
        
        mat}




sort.mat.col <- function(mat, ord){
	# this will swich columns around.
	# ord has the resulting column order...
	mat[,ord]}
	


check.all <- function(long.v, short.v){
	# This gives T or F for each entry of long.v.
	# T if included in short.v, F otherwise
	
	LL <- length(long.v) ; LS <- length(short.v)

	if(LS == 0){out <- rep(0, LL)}
	else{
	temp <- matrix(0, ncol=LL, nrow=LS)
	for(i in 1:LS){temp[i,] <- long.v == short.v[i]}
	out <- apply(temp, 2, sum)}
	
	out}


check.all.pos <- function(long.v, short.v){
	# Similar to check.all
	# This returns the position
	
	temp <- check.all(long.v, short.v)
	
	out <- c(1:length(long.v))[temp==1]
	if(sum(temp)==0){out <- NA}
	out}	

group.count <- function(vec){
	# This will count the consecutive identical elements.
	# try group.count(c(rep(1, 4), rep(2, 3), rep(3, 5)))
	
	n <- length(vec)
	ans <- vec
		ans[1] <- 1
	for(i in 2:n){
		if (vec[i]==vec[i-1])  ans[i] <- ans[i-1] + 1 else ans[i] <- 1 
		}
	ans}	
	
	
exclude <- function(v, how.many){

	# how.many is how many to exclude
	
	LL <- length(v)
	
	#Trim.Bottom <- sort(v)[(how.many+1):LL]
	#Trim.Top <- sort(v)[1:(LL-how.many)]

	ord <- rank(v)

	#cbind(Trim.Bottom, Trim.Top)}

	wo.sma <- replace(v, (ord<=how.many), NA)
	wo.lar <- replace(v, (ord>(LL-how.many)), NA)
	
	wo.small <- wo.sma[!is.na(wo.sma)]
	wo.large <- wo.lar[!is.na(wo.lar)]
	
	list(cbind(v, wo.sma, wo.lar), cbind(wo.small, wo.large))}


threshold <- function(v, b, below=F){
	# this will return the elements of 'v' that is greater than 'lim'
	# if below=T, then ... is less than 'lim'.

	temp <- function(x, y, bel){(-1)^bel * x > (-1)^bel * y}
	
	id <- seq(1:length(v))
	a <- temp(v, b, below)
	ID <- id[a] ; vec <- v[a]
	cbind(ID, vec)
	}
	
	
	
	
hm.sig <- function(v, c.v.v, smaller=T){
	# this will count the number of elements in v that is 
	# smaller than each element of c.v.v (critical value vector)

	Lcv <- length(c.v.v)
	m <- rep(0, Lcv)
	if(smaller==T) {for(i in 1:Lcv){m[i] <- sum(v<c.v.v[i])}
	} else {for(i in 1:Lcv){m[i] <- sum(v>c.v.v[i])}
	}
	
	if(smaller==T) {out <- cbind(" < "=c.v.v, m) 
	} else { out <- cbind( " > " = c.v.v, m)
	}
	
	out}
	
	

an <- function(v){# this removes NA.
	v[!is.na(v)]}



get.rid.of.it <- function(long.v, short.v){
	# This get rid of 'short.v' from 'long.v'
	
	long.v[check.all(long.v, short.v)==0]}

## Sensitivity, specificity

SenSpe <- function(ttt){
	# Given a 2x2 table, or a 2x2 matrix 
	# This will calculate sensitivity, specificity, 
	# Positive Predicted Value, Negative Predicted Value
	# The entries of the table are
	#   
	#   			Disease Yes  Disease No
	#   Diagnostic Yes         11           21
	#   Diagnostic No          12           22
	#   
	
	x11 <- ttt[1,1] ; x12 <- ttt[1,2] ; x21 <- ttt[2,1] ; x22 <- ttt[2,2]
	rowt1 <- x11 + x12 ; rowt2 <- x21 + x22
	colt1 <- x11 + x21 ; colt2 <- x12 + x22 ; grat <- colt1 + colt2

	Sensitivity <- round(x11/colt1, 4)
	Specificity <- round(x22/colt2, 4)
	PosPredValu <- round(x11/rowt1, 4)
	NegPredValu <- round(x22/rowt2, 4)

	cbind(Sensitivity, Specificity, PosPredValu, NegPredValu)}


## ROC curve ##

if(FALSE){
ROC0 <- function(Disease, Diagnosis){
	# Both of the inputs are vectors
	# Yes = 1 = T / No = 0 = F
	
	BothYes <- sum(Disease * Diagnosis)
	DisYes <- sum(Disease) ; DiaYes <- sum(Diagnosis)
	
	x11 <- BothYes
	x21 <- DiaYes - BothYes
	x12 <- DisYes - BothYes
	x22 <- length(Diagnosis) - x11 - x12 - x21
	
	temp1 <- as.data.frame(matrix(c(x11, x12, x21, x22), ncol=2), row.names=c('Diagnosis Yes', 'Diagnosis No'))
		names(temp1) <- c('Disease Yes', 'Disease No')
	temp2 <- SenSpe(as.matrix(temp1))
	list(temp1, temp2)
	}


Diag <- function(obs, cutp){
	# This will give a diagnosis based on the observation (obs) and the cutpoint (cutp).
	# obs = cutp will be Diagnosis = Yes.
	
	obs <= cutp}


Diagv <- function(obs, cutpv){
	# See Diag.  Cutpoint is now a vector.
	len <- length(cutpv)
	ans <- matrix(0, nrow=length(obs), ncol=len)
	for(i in 1:len){ans[,i] <- Diag(obs, cutpv[i])}
	#ansd <- data.frame(ans) ; names(ansd) <- cutpv
	ans}


ROC1 <- function(Disease, Diagnosis){
	# This gives a simpler output than ROC0
	# It only returns Sensitivity and Specificity so that 
	# they can be used in a ROC Curve plot.
	temp <- ROC0(Disease, Diagnosis)
	temp[[2]][1,c(1,2)]}




exp.tab <- function(mat){
	# mat is a matrix
	overall <- sum(mat)
	col.sum <- apply(mat, 2, sum)
	row.sum <- apply(mat, 1, sum)
	exp.tab <- t(col.sum %*% t(row.sum)) / overall
	exp.tab}
	
simple.chisq <- function(mat){
	# very simple chisq test on a table
	obst <- mat ; expt <- exp.tab(mat)
	csv <- (obst - expt)^2/expt
	df <- prod(dim(mat)-1)
		test.stat <- sum(csv)
		p.val <- 1-pchisq(test.stat, df)
	out <- cbind(test.stat, df, p.val)
	list(obst, expt, csv, out)}


ROCx0 <- function(Disease, Diagnosis, cutp){ROC0(Disease, Diag(Diagnosis, cutp))}


ROCx1 <- function(Disease, Diagnosis, cutp){
	temp <- ROCx0(Disease, Diagnosis, cutp)
	temp2 <- c(temp[[2]][1,1], temp[[2]][1,2])
	c(cutp, temp2)}

ROC2 <- function(Disease, Diagnosis, cuts, cute, cutb){
	cutv <- seq(cuts, cute, by=cutb)
	lencutv <- length(cutv)
	ans<-matrix(0, ncol=3, nrow=lencutv)
		for(i in 1: lencutv){
			ans[i,] <- ROCx1(Disease, Diagnosis, cutv[i])}
	ansdf <- data.frame(ans)
	names(ansdf) <- c("Cut Point", "Sensitivity", "Specificity")
	ansdf}
	

ROC2b <- function(Disease, Diagnosis, cuts, cute, cutb){
		temp <- ROC2(Disease, Diagnosis, cuts, cute, cutb)
		temp[,3] <- 1-temp[,3]
		names(temp) <- c("Cut Point", "Sensitivity", "1-Specificity")
		temp}

ROC3 <- function(Disease, Diagnosis, cuts, cute, cutb){
	
	temp <- as.matrix(ROC2(Disease, Diagnosis, cuts, cute, cutb))
	Sensitivity <- temp[,2] ; OneMinusSpecif <- 1- temp[,3] ; Cutoff <- temp[,1]
	plot(OneMinusSpecif, Sensitivity, type="l", xlab="1-Specificity", xlim=c(0,1), ylim=c(0,1), main="ROC Curve") 
	spesen <- temp[,2] + temp[,3]
	lines(c(0,1), c(0,1), lty=1)
	cbind(Cutoff, Sensitivity, OneMinusSpecif, spesen)}


ROC3b <- function(Disease, Diagnosis, cuts, cute, cutb){
	
	temp <- as.matrix(ROC2b(Disease, Diagnosis, cuts, cute, cutb))
	Sensitivity <- temp[,2] ; OneMinusSpecif <- 1- temp[,3] ; Cutoff <- temp[,1]
	plot(OneMinusSpecif, Sensitivity, type="l", xlab="1-Specificity", xlim=c(0,1), ylim=c(0,1), main="ROC Curve") 
	spesen <- temp[,2] + temp[,3]
	lines(c(0,1), c(0,1), lty=1)
	cbind(Cutoff, Sensitivity, OneMinusSpecif, spesen)}
}


########################
### ROC CURVE REDONE ###
###  November  2006  ###
########################

roc0 <- function(disease,diag.numeric,cut.point,BigIsBad=T,EqualIsBad=T){
	# disease = T/F or 1/0 ; diag.numeric is numeric
	# if BigIsBad, then diagnosis=T when diag.numeric > cut.point
	# if EqualIsBad, then diagnosis=T when diag.numeric == cut.point
	
		if(length(unique(disease)) != 2){ stop(' disease should be 1/0 or TRUE/FALSE. ') }
	dis <- factor( as.logical(disease), levels=c(T,F) )
		fun1 <- function(x,y){x>y} ; fun2 <- function(x,y){x>=y}
		fun <- list(fun1,fun2)[[1+EqualIsBad]]
	if(!BigIsBad){ cut.point <- -cut.point ; diag.numeric <- -diag.numeric }
	dia <- factor( fun(diag.numeric,cut.point) , levels=c(T,F))
	SenSpe(table(dia,dis))
	}

set.seed(40)
disease <- sample(0:1,40,T)
diag.numeric <- (rnorm(40, 200, 17))
cut.point <- 190



roc0v <- function(disease, diag.numeric, BigIsBad=T, EqualIsBad=T, cut.pointV=NA){
	if(is.na(cut.pointV[1])){ cut.pointV <- seq(floor(min(diag.numeric)),ceiling(max(diag.numeric))) }
	L <- length(cut.pointV) ; if(BigIsBad){ cut.pointV <- rev(cut.pointV) }
	out <- matrix(0, ncol=3, nrow=L)
		for(i in 1:L){
			temp <- roc0(disease, diag.numeric, cut.pointV[i], BigIsBad, EqualIsBad)
			temp[,2] <- 1-temp[,2]
			out[i,] <- c(cut.pointV[i], temp[,1:2])}
	unik <- !duplicated( out[,2]*100 + out[,3] )
	dd <- data.frame(out[unik,]) ; names(dd) <- c('cut point', 'Sensitivity', '1-Specificity')
	
	x <- dd[,3] ; y <- dd[,2]
	area <- rep(0,ii<-nrow(dd)-1)
		for(i in 1:ii){area[i] <- (y[i]+y[i+1])*(x[i+1]-x[i])/2}
	auc <- sum(area)
	
	plot(x,y,xlim=c(0,1),ylim=c(0,1),las=1,xlab='1-Specificity',ylab='Sensitivity',type='b',pch=19)
	
	list(dat=dd,auc=auc)
	}

choose.best <- function(out){
	# out is output of roc0v
	out <- out[[1]]
	sens <- out[,2] ; spes <- 1-out[,3]
	dist <- round(sqrt((1-sens)^2+(1-spes)^2),4)
	min.loc <- rep('',length(sens)) ; min.loc[which(dist==min(dist))] <- '*'
	res <- data.frame(out, dist,min.loc)
	names(res) <- c('cut point','Sensitivity','1-Specificity','distance to (1,1)','min')
		
	res
}



notin <- function(long.v, short.v){
	# this gives elements of long.v that are not in short.v
	long.v[check.all(long.v, short.v)==0]
	}
	

newtable <- function(vec, st, en){
	# A better table.  
	# st and en define a range
	
	ans <- range(range(vec), st, en)
	x <- count <- seq(ans[1], ans[2])
	for(i in 1:length(x)){count[i] <- sum(vec==x[i])}
	rbind(x, count)
	}

new.table <- function(vec, rang){
	# A better table.
	# values in 'rang' will be forced to enter.

	x <- count <- sort(c(unique(vec), rang))
	for(i in 1:length(x)){count[i] <- sum(vec==x[i])}
	rbind(x, count)}



add.marginal <- function(tab){
	# This adds mariginal total to a table

	tot.x <- apply(tab, 1, sum)
	toty2<- apply(tab, 2, sum)
	grand <- sum(tab)
	tot.y <- c(toty2, grand)
	
	temp2 <- cbind(tab, tot.x)
	temp3 <- rbind(temp2, tot.y)

	temp3
}

nona <- function(v){v[!is.na(v)]}


av <- as.vector


MMQ <- function(v, g=NA, dig=3, All=T, simple=F){
	# Mean, Median, Quartiles of vector v by group g.  
	# g can be unspecified.  
	
	v.original <- v ; not.na <- !is.na(v.original)
	
	unspecified <- length(g) == 1

	if(unspecified){All <- F}
	g <- factor(g)
	f <- function(v, dig){format(v, nsmall=dig, digits=dig)}

	if (unspecified){ g <- factor(rep(0, length(v)), labels='all')}
	v <- v.original[not.na] ; g <- g[not.na]
	
	integ <- all(v==as.integer(v))

		temp <- function(v){
			o.mean <- mean(v)
			o.medi <- median(v)
			o.lowq <- av(quantile(v, .25))
			o.higq <- av(quantile(v, .75))
			o.min <- min(v)
			o.max <- max(v)
			o.sdsd <- sd(v)
			n <- length(v)
				
			if(!integ){
			out <- c(n, f(c(o.min, o.lowq, o.medi, o.higq, o.max, o.mean, o.sdsd), dig))}
			if( integ){
			out <- c(n, o.min, f(c(o.lowq, o.medi, o.higq), dig), o.max, f(c(o.mean, o.sdsd), dig))}
		out}

	hm <- length(unique(g))
	ans <- matrix(0, ncol=8, nrow = hm+1)

	for(i in 1:hm){
		ans[i,] <- temp(v[find.pos(g==levels(g)[i], T)])
	}
	ans[hm+1, ] <- temp(v)
	adf <- data.frame(ans, row.names=c(levels(g), 'All'))
	
		if(any(!not.na)){
			na.l <- sum(!not.na)
		adf <- data.frame(rbind(ans, c(na.l, rep(NA,7))))
		hm <- hm+1}
	
	names(adf) <- c('N', 'MIN', 'L.Q.', 'Median', 'U.Q.', 'MAX', 'Mean', 'SD')
	
	if(!All){adf <- adf[1:hm,]}
	if(simple){ adf <- adf[,c(1,4,3,5)]}
	adf
	}

group.sum <- gs <- function(v,g, r1=10){
	g <- factor(g) ; lg <- levels(g) ; ng <- length(lg)
	n <- g.mean <- g.med <- g.min <- g.max <- g.sd <-  g.25 <- g.75 <- rep(0,ng+1)

	r2 <- r1 ; if( is.integer(v) ){ r2 <- 0 }	
		rr1 <- function(x){round(x,r1)}
		rr2 <- function(x){round(x,r2)}		

		
	for(i in 1:ng){
		vg <- v[g==lg[i]]
		n[i] <- length(vg) ; g.mean[i] <- mean(vg) ; g.sd[i] <- sd(vg)
		g.med[i] <- median(vg) ; g.max[i] <- max(vg) ; g.min[i] <- min(vg)
		g.25[i] <- quantile(vg, .25) ; g.75[i] <- quantile(vg, .75)
	}
	n[ng+1] <- length(v) ; g.mean[ng+1] <- mean(v) ; g.sd[ng+1] <- sd(v)
	g.med[ng+1] <- median(v) ; g.max[ng+1] <- max(v) ; g.min[ng+1] <- min(v)
	g.25[ng+1] <- quantile(v, .25) ; g.75[ng+1] <- quantile(v, .75)
	
	
	
	out <-data.frame(n, rr2(g.min), rr1(g.25), rr1(g.med), rr1(g.75), rr2(g.max), rr1(g.mean), rr1(g.sd), 
		row.names=c(lg, 'All'))
	names(out) <- c('n', 'min', '1Q', 'med', '3Q', 'max', 'mean', 'sd')
	out
	}


SS <- function(v, g){
	# g is a factor
	lg <- levels(g) ; ng <- length(lg)

	y.. <- mean(v)
	yi. <- ni <- rep(0, ng)

	for(i in 1:ng){
		vi <- v[g==lg[i]]
		yi.[i] <- mean(vi) ; ni[i] <- length(vi)}
	
	SSTO <- sum((v - y..)^2)
	SSTR <- sum(ni * (yi. - y..)^2 )

	SSE <- SSTO - SSTR

	cbind(SSTO, SSTR, SSE)
}

lin.chisq <- function(mat){
	av <- as.vector ; rr <- function(x){round(x, 5)}
	rrav <- function(x){rr(av(x))}
	
	rn <- c('Pearson', 'Linear (Mantel-Haenszel)', 'Deviation from Linear')
	
	a <- chisq.test(table(mat[,1], mat[,2]))
	xsq1 <- rrav(a$statis) ; df1 <- av(a$pa) ; pv1 <- rrav(a$p.v)	
	
	r2 <- cor(mat[,1], mat[,2])^2 ; N <- dim(mat)[1]
	xsq2 <- rr((N-1)*r2) ; df2 <- 1 ; pv2 <- rr(1-pchisq(xsq2, df2))
	xsq3 <- xsq1 - xsq2 ; df3 <- df1 - df2 ; pv3 <- rr(1-pchisq(xsq3, df3))

	one <- c(xsq1, xsq2, xsq3)
	two <- c(df1,  df2, df3)
	thr <- c(pv1,  pv2, pv3)
	
	out <- data.frame(one, two, thr, row.names=rn)
	names(out) <- c('ChiSq', 'df', 'pvalue')
	out
	}


mf <- function(v1, v2){
	temp <- my.ft(v1, v2)
	return(list(temp[[2]], temp[[3]]))}



drop.fac.lev <- function(v){
	# drop factor levels that do not appear in 'v'
	as.factor(as.character(v))}	

"%w/o%" <- function(a,b)  a[! a %in% b]	 ## a without b

mystripchart <- function(formula, vertical=T, pch=16, method="jitter", lwd=1, ...) {

  stripchart(formula, vertical=vertical, pch=pch, method=method, ...)

  respvar.test <- t.test(formula)
  marg <- diff(respvar.test$conf.int)/4
  segments(0.9, respvar.test$est[1], 1.1, respvar.test$est[1], lwd=lwd)
  segments(1.9, respvar.test$est[2], 2.1, respvar.test$est[2], lwd=lwd)
  segments(1.15, respvar.test$est[1] - marg, 1.15, respvar.test$est[1] + marg, lwd=lwd)
  segments(1.85, respvar.test$est[2] - marg, 1.85, respvar.test$est[2] + marg, lwd=lwd)

}

ld <- function(){
	# list data.frame
	ll()[ll()$Class=='data.frame',]}

lf <- function(){
	# list data.frame
	ll()[ll()$Class=='function',]}




table1.num <- function(v, g){

	## g cannot have NA
	## v may
	
	require(exactRankTests)

	g <- factor(g)
	if(length(unique(g)) !=2) {stop('Only works with two factor levels.')}
	
	av <- as.vector
	
	v1m <- v[g == levels(g)[1]] ; v2m <- v[g == levels(g)[2]]
	m1 <- sum(is.na(v1m)) ; m2 <- sum(is.na(v2m)) ; ma <- m1+m2
	v1 <- v1m[!is.na(v1m)] ; v2 <- v2m[!is.na(v2m)]
	
	l1 <- length(v1) ; l2 <- length(v2) ; la <- length(c(v1,v2))
	s1 <- av(summary(v1)) ; s2 <- av(summary(v2)) ; sa <- av(summary(v))
		me1 <- mean(v1) ; me2 <- mean(v2) ; mea <- mean(c(v1,v2))
		sd1 <- sd(v1) ; sd2 <- sd(v2) ; sda <- sd(c(v1,v2))
	
	pval.w <- formatC(pw <- wilcox.exact(v~g)$p.v, format='f', dig=8)
	pval.t <- formatC(pt <- t.test(v~g)$p.v, format='f', dig=8)
	
	all1 <- round( c(l1, m1, s1[c(3, 2,5,1,6)], me1, sd1), 3)
	all2 <- round( c(l2, m2, s2[c(3, 2,5,1,6)], me2, sd2), 3)
	alla <- round( c(la, ma, sa[c(3, 2,5,1,6)], mea, sda), 3)
	
	out <- data.frame(rbind(all1, all2, alla), row.names=c(levels(g), 'ALL'))
	names(out) <- c('N', 'NA', 'Median', '1stQ', '3rdQ', 'Min', 'Max', 'Mean','SD')
	
	out2 <-  data.frame( cbind(c('Wilcoxon Exact','t'),c(pval.w,pval.t)) )
		names(out2) <- c('Test','P-value')
	list(out, out2)
	
	
}
	

show.colors <- function(x=22,y=30){
par(mfrow=c(1,1))
   par(mai=c(.4,.4,.4,.4), oma=c(.2,0,0,.2))

   plot(c(-1,x),c(-1,y), xlab='', ylab='', type='n', xaxt='n', yaxt='n', bty='n')
   for(i in 1:x){for(j in 1:y){
         k <- y*(i-1)+j ; co <- colors()[k]
         rect(i-1, j-1, i, j, col=co, border=1)}}

   text(rep(-.5,y),(1:y)-.5, 1:y, cex=1.2-.016*y)
   text((1:x)-.5, rep(-.5,x), y*(0:(x-1)), cex=1.2-.022*x)
   title('col=colors()[n]')
}



## a better group summary
set.seed(90) ; v <- rnorm(200, 200, 20) ; g <- factor(rep(LETTERS[1:5],c(50,40,30,60,20)), levels=LETTERS[1:10])

group.sum <- gs <- function(v,g, r1=10){
	g <- factor(g) ; lg <- levels(g) ; ng <- length(lg)
	n <- g.mean <- g.med <- g.min <- g.max <- g.sd <-  g.25 <- g.75 <- rep(0,ng+1)

	r2 <- r1 ; if( is.integer(v) ){ r2 <- 0 }	
		rr1 <- function(x){round(x,r1)}
		rr2 <- function(x){round(x,r2)}		

		
	for(i in 1:ng){
		vg <- v[g==lg[i]]
		n[i] <- length(vg) ; g.mean[i] <- mean(vg) ; g.sd[i] <- sd(vg)
		g.med[i] <- median(vg) ; g.max[i] <- max(vg) ; g.min[i] <- min(vg)
		g.25[i] <- quantile(vg, .25) ; g.75[i] <- quantile(vg, .75)
	}
	n[ng+1] <- length(v) ; g.mean[ng+1] <- mean(v) ; g.sd[ng+1] <- sd(v)
	g.med[ng+1] <- median(v) ; g.max[ng+1] <- max(v) ; g.min[ng+1] <- min(v)
	g.25[ng+1] <- quantile(v, .25) ; g.75[ng+1] <- quantile(v, .75)
	
	
	
	out <-data.frame(n, rr2(g.min), rr1(g.25), rr1(g.med), rr1(g.75), rr2(g.max), rr1(g.mean), rr1(g.sd), 
		row.names=c(lg, 'All'))
	names(out) <- c('n', 'min', '1Q', 'med', '3Q', 'max', 'mean', 'sd')
	out
	}



logrank <- function(group, event, time){                                
	# group = �Q�����ʂ���x�N�g���i1, 2 �̂����ꂩ�j                                       
	# event = ���S�Ȃ� 1�C�����Ȃ� 0 �̒l���Ƃ�x�N�g��
	# time = �������ԃx�N�g��

        OK <- complete.cases(group, event, time)                # �����l�����P�[�X������
        group <- group[OK]
        event <- event[OK]
        time <- time[OK]
        len <- length(group)
        stopifnot(length(event) == len, length(time) == len)
        tg <- table(c(time, rep(NA, 4)),                        # ����4���ڂ̓_�~�[
                    c(group, 1, 1, 2, 2)*10+c(event, 1, 0, 1, 0))
        k <- nrow(tg)
        nia <- table(group)[1]
        nib <- len-nia
        na <- c(nia, (rep(nia, k)-cumsum(tg[,1]+tg[,2]))[-k])
        nb <- c(nib, (rep(nib, k)-cumsum(tg[,3]+tg[,4]))[-k])
        da <- tg[,2]
        db <- tg[,4]
        dt <- da+db
        nt <- na+nb
        d <- dt/nt
        O <- c(sum(da), sum(db))
        ea <- na*d
        eb <- nb*d
        E <- c(sum(ea), sum(eb))

        result <- data.frame(da, db, dt, na, nb, nt, d, ea, eb)

        chi <- sum((O-E)^2/E)
        P <- pchisq(chi, 1, lower.tail=FALSE)
        result2 <- c("Chi-sq."=chi, "d.f."=1, "P value"=P)      # �x�i�ɂ�錟�����

        v <- sum(dt*(nt-dt)/(nt-1)*na/nt*(1-na/nt), na.rm=TRUE)
        chi3 <- (sum(da)-sum(na*d))^2/v
        P3 <- pchisq(chi3, 1, lower.tail=FALSE)
        result3 <- c("Chi-sq."=chi3, "d.f."=1, "P value"=P3)    # SAS ���Ɠ����������

        list(result=result, result2=result2, result3=result3)
}



my.km.plot <- function(km, col1=c(1,1), col2=c(1,1), lty1=c(1,2), lty2=c(1,2), lwd1=c(1,1), lwd2=c(1,1), ...){
	# km is output of survfit with type='kaplan-meier'
	# lty1 <- c(ltymain, ltyci) for group 1

		xf <- function(v){if(length(v)==1){rep(v,2)}else{v}}
			col1 <- xf(col1) ; col2 <- xf(col2) ; lty1 <- xf(lty1) ; lty2 <- xf(lty2) ; lwd1 <- xf(lwd1) ; lwd2 <- xf(lwd2)

		sk <- summary(km) ; st <- sk$time
			original.time <- km$time ; cuttp <- which(diff(original.time)<0)
				ot1 <- original.time[1:cuttp] ; ot2 <- original.time[(cuttp+1):length(original.time)]
		cutp <- which(diff(st)<0) ; gr1 <- 1:cutp ; gr2 <- (cutp+1):length(st)
	nr1 <- sk$n.risk[gr1] ; nr2 <- sk$n.risk[gr2]
	tim1 <- sk$time[gr1] ; tim2 <- sk$time[gr2]
	lci1 <- sk$lower[gr1] ; uci1 <- sk$upper[gr1] ; lci2 <- sk$lower[gr2] ; uci2 <- sk$upper[gr2]
			lv1 <- length(nr1) ; lv2 <- length(nr2)
		if(nr1[lv1]>1){lci1<-c(lci1,lci1[lv1]) ; uci1 <- c(uci1,uci1[lv1]) ; tim1 <- c(tim1,max(ot1))}
		if(nr2[lv2]>1){lci2<-c(lci2,lci2[lv2]) ; uci2 <- c(uci2,uci2[lv2]) ; tim2 <- c(tim2,max(ot2))}
	
	
	plot(km, conf=F, lty=c(lty1[1],lty2[1]), lwd=c(lwd1[1],lwd2[1]), col=c(col1[1],col2[1]), ...)
		lines(tim1,lci1, type='s', lty=lty1[2], lwd=lwd1[2], col=col1[2])
		lines(tim1,uci1, type='s', lty=lty1[2], lwd=lwd1[2], col=col1[2])
		lines(tim2,lci2, type='s', lty=lty2[2], lwd=lwd2[2], col=col2[2])
		lines(tim2,uci2, type='s', lty=lty2[2], lwd=lwd2[2], col=col2[2])
		
	list(cutp, lci1,uci1,tim1, lci2,uci2,tim2)	
	
	}



kappa.agreement <- ka <- function(tt){
	## tt is a 2x2 table
	
	aa <- tt[1,1] ; bb <- tt[2,1] ; cc <- tt[1,2] ; dd <- tt[2,2]
	n <- aa+bb+cc+dd
	i0 <- (aa+dd)/n ; ie <- ( (aa+cc)*(aa+bb)+(bb+dd)*(cc+dd) ) / n^2
	kappa <- (i0-ie)/(1-ie)
	
	data.frame(i0,ie,kappa)
	}


add1920 <- function(v){
	# v is a vector of dates in character mode in the form,
	# yy-mm-dd or yy/mm/dd
	
	yy <- as.numeric(substr(v,1,2))
	td <- 19 + (yy<50)*1
	
	out <- paste(td, v, sep='')
	whosna <- which(out=='NANA')
	
	out2 <- paste(substr(out,1,4),'-',substr(out,6,7),'-',substr(out,9,10),sep='')
	out2[whosna] <- NA
	out2	
	}

sub1920 <- function(v){
	# This does opposite of add1920
	# so that functions like date can be used
	
	substr(v,3,10)
	}

count.duplicated <- function(v){
	out <- v
	for(i in 1:length(v)){
		out[i] <- sum(v == v[i])}
	out
	}


add.leading0 <- function(v){
	nc <- nchar(v) ; ncm <- max(nc) ; ncd <- ncm-nc
	to.add <- rep(0,L <- length(nc))
	for(i in 1:L){
		to.add[i] <- paste(rep(0,ncd[i]+1),collapse='')
		}
	substr(paste(to.add,v,sep=''),2,ncm)
	}

##
within.group.id <- wgi <- function(v,g=NULL){

	one.group <- function(v){
		nl <- length(v)
		new <- which(diff(v)!=0)
		gs <- c(new[1],diff(c(new,nl)))
		rep(1:length(gs), gs)
	}
	
	out1 <- one.group(v)
	if(length(g)>0){
		out1 <- NULL
		g <- factor(g)
		n.group <- length(unique(g))
		for(i in 1:n.group){
			vv <- v[g==unique(g)[i]]
			out1 <- c(out1,one.group(vv))
			}
	}
	out2 <- NULL
		a <- c(which(diff(out1)!=0),length(v))
		b <- c(a[1],diff(a))
	for(i in 1:length(b)){out2 <- c(out2,1:b[i])}
	cbind(out1,out2)
	}



joint.marginal.plot <- jm.plot <- function(x,y, ...){
#	layout( rbind(c(4,2),c(3,1)), c(1,3), c(1,3))
	# main
#	par(mar=c(5,0,0,5))
	plot(x,y, yaxt='n', ...)
	axis(side=3, at=x, label=rep('',length(x)), tick=T, ...)
	axis(side=4, at=axTicks(2), ...)
	axis(side=2, at=y, label=rep('', length(y)), tick=T, ...)
	}


rm.all.na <- function(m){
		empty.is.na <- function(x){ replace(x,x=='',NA) }
		na.or.empty <- function(x){ (is.na(x) | x=='') }
	nna.col <- apply(m,2,function(x) any(!na.or.empty(x)))
	nna.row <- apply(m,1,function(x) any(!na.or.empty(x)))
	empty.is.na(m)[nna.row,nna.col]
	}

factors.in.data.frame <- function(d, col.id){
	nc <- 1:length(col.id)
	for(i in nc){d[,col.id[i]] <- factor(factor(d[,col.id[i]]))}
	d
	}
dates.in.data.frame <- function(d, col.id, fmt){
	nc <- 1:length(col.id)
	for(i in nc){d[,col.id[i]] <- as.Date(as.character(d[,col.id[i]]), format=fmt)}
	d
	}
	
numeric.in.data.frame <- function(d, col.id){
	nc <- 1:length(col.id)
	for(i in nc){d[,col.id[i]] <-
	suppressWarnings( as.numeric(as.character(d[,col.id[i]])))
	}
	d
	}