*MACRO_6-22-10; /******************* Pre-processing *******************/ %macro summarydata_preprocess (dat, outdat); * compute chisq; data b1; set &dat; keep snpid controls_AA controls_AB controls_BB cases_AA cases_AB cases_BB; run; %sort (b1, snpid); data b1; set b1; order = _n_; run; proc transpose data=b1 out=c1; by order snpid; run; data d1; set c1; grp = scan(_name_, 1, "_"); genotype = scan(_name_, 2, "_"); if genotype = "AA" then genoc = 0; if genotype = "AB" then genoc = 1; if genotype = "BB" then genoc = 2; genoc2 = 2 - genoc; run; ods exclude all; ods noresults; proc logistic data=d1; by snpid; class grp; weight col1; model grp = genoc2; ods output GlobalTests=chisq(where=(Test="Score")); run; data chisq2; set chisq; keep snpid chisq; run; * gene-snp info; data gene_snp; set &dat; keep chromosome genename snpid location; run; %sort (gene_snp nodup, genename snpid); * count # of snps in each gene; proc freq data=gene_snp; tables genename; ods output OneWayFreqs=gene_count; run; data gene_count; set gene_count; gene_nsnps = frequency; keep genename gene_nsnps; run; * count # of genes in each snp; proc freq data=gene_snp; tables snpid; ods output OneWayFreqs=snp_count; run; data snp_count; set snp_count; snp_ngenes = frequency; keep snpid snp_ngenes; run; ods exclude none; ods results; * gene-snp + chisq ; %sort (gene_snp, snpid); %sort (chisq2, snpid); %sort(snp_count, snpid); data gene_snp_chisq; merge gene_snp chisq2 snp_count; by snpid; keep genename snpid chisq snp_ngenes location chromosome; run; * gene-snp + chisq + nsnps; %sort (gene_count, genename); %sort(gene_snp_chisq, genename); data &outdat; format datset $50.; datset ="&dat"; merge gene_snp_chisq gene_count; by genename; run; %mend; /**************** spatial model ****************/ %macro modc_sp_summary (dat, input, out_genes, outstatus); data gene_snp; set &input; keep chromosome genename snpid location; run; %sort (gene_snp nodup, genename snpid); * make indicator vars for each gene; data c; set &input; keep snpid genename ; run; %sort (c nodup, snpid genename); data d; set c; large_gene=1; run; %sort(d, snpid); proc transpose data=d out=e(drop=_name_) ; by snpid; id genename; var large_gene; run; proc contents data=e out=out noprint; run; %sort (out, name); data _null_; set out(where=(name ne "snpid")) end=eof; call symput ("genename"||compress(_n_), compress(name)); if eof then call symput ('ngenes',compress(_n_)); run; * take out duplicates-snps on multiple genes; data noduplicates; set &input; drop genename gene_nsnps; run; proc sort data=noduplicates nodup; by snpid; run; * calculate gene indicator vars; %sort (e, snpid); %sort(noduplicates, snpid); data f; merge e noduplicates; by snpid; %do i=1 %to &ngenes; if &&genename&i = . then &&genename&i=0; %end; locs = location/1e7; run; %sort (f, chromosome snpid); * compute design matrix for each gene; data one; set f; keep snpid chromosome; run; %sort (one nodup, snpid); %sort (gene_snp, snpid); data two; merge one gene_snp; by snpid; ind=1; keep genename chromosome ind; run; %sort (two nodup, genename); proc glmmod data=two outparm=parm outdesign=design; class chromosome; model ind = chromosome/noint; run; data _null_; set parm nobs=nk; call symput ('nchrom',nk); run; data design2; set design; desg = catx(" ", of col1-col%eval(1*&nchrom)); run; data _null_; set design2 end=eof; call symput ("gene"||compress(_n_), desg); run; * fit mixed model; proc glimmix data=f maxopt=100; class chromosome; model chisq = /s dist=gamma link=log; random %do g=1 %to &ngenes; &&genename&g %end; /type=sp(exp) (locs) gcoord=mean subject=chromosome; *overall; estimate "all genes " int 1/upper; * first gene; estimate "&genename1" |&genename1 1 /subject &gene1 ; %do k=2 %to %eval(&ngenes); estimate "&&genename&k" | &&genename&k 1/ subject &&gene&k; %end; nloptions tech=nrridg; ods output ConvergenceStatus=status_mod2d; ods output estimates=est_mod2d; run; data _null_; set status_mod2d; call symput ("status", compress(status)); run; data &outstatus; format datset $50.; set status_mod2d; datset = "&dat"; mod="spatial"; run; %if &status = 0 %then %do; *converged; data &out_genes; format datset $50.; set est_mod2d; mod="spatial"; datset="&dat"; run; %end; %else %do; *did not converge; data &out_genes; run; %end; title; %mend; %macro sort (dat, by); proc sort data=&dat; by &by; run; %mend; /******************** * Main Program * *********************/ options mprint nodate nocenter formdlim=' '; %let dir = C:\; libname h "&dir"; %summarydata_preprocess(dat=h.geneset, outdat=geneset); %modc_sp_summary (dat=geneset, input=geneset, out_genes=one_mod2d, outstatus=one_status_mod2d);