rm(list=ls()) ## first example ## ##header functions## ##---------------------------------------------------------------------------- # function g(x,gep,geb2,geb3,gk): target function in uniroot() to solve alpha ##---------------------------------------------------------------------------- # x: independent variable # gep: population prob(A) # geb2: beta2, coef for I(Aa) # geb3: beta3, coef for I(aa) # gk: population prevalence rate k ##------------------------------------------------------------- g<-function(x,gep,geb2,geb3,gk){ gep*gep*x + 2*gep*(1-gep)*x*exp(geb2)/(1-x+x*exp(geb2)) + (1-gep)*(1-gep)*x*exp(geb3)/(1-x+x*exp(geb3)) - gk } ##---------------------------------------------------------------------------- # function Makedata(p,b2,b3,k,nt,nc): data simulation ##---------------------------------------------------------------------------- # p: population prob(A) # b2: beta2, coef for I(Aa) # b3: beta3, coef for I(aa) # k: population prevalence rate k # nt: number of cases # nc: number of controls ##---------------------------------------------------------------------------- Makedata<-function(p,b2,b3,k,nt,nc) { #baseline risk: calculated through function g() alpha<-uniroot(g,lower=0,upper=1,tol=1e-10, gep=p,geb2=b2,geb3=b3,gk=k)$root #population genotype frquencies q<-1-p pAA<-p^2 pAa<-2*p*q paa<-q^2 #derive the genotype proportions in case/control aa1<-paa*alpha*exp(b3)/(1-alpha+alpha*exp(b3))/k Aa1<-pAa*alpha*exp(b2)/(1-alpha+alpha*exp(b2))/k AA1<-pAA*alpha/k aa0<-paa*(1-alpha)/(1-alpha+alpha*exp(b3))/(1-k) Aa0<-pAa*(1-alpha)/(1-alpha+alpha*exp(b2))/(1-k) AA0<-pAA*(1-alpha)/(1-k) #simulate the disease outcomes and genotypes n<-nt+nc outcome<-c(rep(1,nt),rep(0,nc)) genotype<-rep(NA,n) for (i in 1:n) { if (i<=nt) { x<-runif(1) genotype[i]<-ifelse(x<=AA1,1, ifelse(x>AA1+Aa1,3,2)) } else { x<-runif(1) genotype[i]<-ifelse(x<=AA0,1, ifelse(x>AA0+Aa0,3,2)) } } genotype<-as.factor(genotype) levels(genotype)<-c("AA","Aa","aa") dat<-data.frame(seq(1,n),outcome,genotype) names(dat)<-c("id","outcome","genotype") return(dat) } ## result ## dat<-Makedata(p=0.6, b2=0.5, b3=2, k=0.05, nc=100, nt=100) dat with(dat,table(genotype,outcome)) ## second example ## # suppose you have a small dataset p=0.6 b2=0.5 b3=2 k=0.05 nc=nt=10 dat<-Makedata(p, b2, b3, k, nc,nt) with(dat,table(genotype,outcome)) # expand the control group s.t.: (n.1):N=1:k case<-subset(dat, outcome==1) ctrl<-subset(dat, outcome==0) new_ctrl<-dat[sample(ctrl$id, nt*(1/k-1),replace=TRUE),] # new data new_dat<-rbind(case,new_ctrl) #in comparison with(dat,table(genotype,outcome)) with(new_dat,table(genotype,outcome))