### Clearing all of the data so that I am starting with a clean set of data. rm(list=ls()) ### Setting my working directory to where I want to save results setwd("~/Dropbox/vigh/d43-nigeria/workshops/workshop2023/analyses") ### Reading in the AKTH dataset d<-read.csv("Nigeria-R3-Data-Simple.csv") ### Based on discussions with Muktar, we determined that the outlier potassium value of 65 should be changed ### to 6.5, and the outlier sodium value of 41 should be changed to 141. d$potassium<-ifelse(d$potassium==65, 6.5, d$potassium) d$sodium<-ifelse(d$sodium==41, 141, d$sodium) head(d) ######### Let's do some basic descriptives of the data with(d, summary(age)) with(d, hist(age)) mean(d$male) with(d, summary(bmi)) with(d, hist(bmi)) mean(d$smoke) with(d, summary(cd4)) with(d, hist(cd4)) mean(d$tdf) mean(d$dtg) table(d$risk.alleles) mean(d$htn) d$jnc_bp<-factor(d$jnc_bp) table(d$jnc_bp) summary(d$years.ART) summary(d$uACR) with(d, hist(uACR)) with(d, boxplot(uACR)) d$log10uACR<-log10(d$uACR) with(d, hist(log10uACR)) with(d, boxplot(log10uACR)) summary(d$eGFR) with(d, hist(eGFR)) summary(d$potassium) with(d, hist(potassium)) ### We should have done a boxplot to catch the oulier with(d, boxplot(potassium)) summary(d$sodium) with(d, hist(sodium)) summary(d$bicarbonate) with(d, hist(bicarbonate)) summary(d$chloride) with(d, hist(chloride)) summary(d$urea) with(d, hist(urea)) summary(d$SBP) with(d, hist(SBP)) summary(d$DBP) with(d, hist(DBP)) hist(d$bmi) summary(d$bmi) boxplot(d$bmi) head(d) summary(d$cd4) mean(d$tdf) table(d$male,d$smoke) plot(d$age,d$bmi) cor(d$age,d$bmi) boxplot(d$bmi ~ d$male) sum(is.na(d$cd4)) for (i in names(d)){ print(paste(i,sum(is.na(d[i])))) } mean(d$age) mean(d$cd4,na.rm=TRUE) summary(d$cd4) quantile(d$cd4, c(.10,.25,.99), na.rm=TRUE) quantile(d$age, c(.10,.25,.99)) hist(d$cd4) hist(log(d$uACR)) with(d, plot(log(uACR),eGFR)) with(d, cor(log(uACR),eGFR)) mean(d$htn) ##### Univariate Associations with htn (self-reported hypertension) #### Logistic regression model looking at association between age and hypertension ## We obtain the model coefficients and p-values, and then we compute odds ratios ## and 95% confidence intervals for the odds ratios. mod.age<-glm(htn ~ age, data=d, family=binomial) summary(mod.age) exp(mod.age$coefficients) exp(confint.default(mod.age)) #### Logistic regression model looking at the association between cd4 and hypertension mod.cd4<-glm(htn ~ cd4, data=d, family=binomial) summary(mod.cd4) exp(mod.cd4$coefficients*100) exp(confint.default(mod.cd4)*100) mod.dm<-glm(htn ~ DM, data=d, family=binomial) summary(mod.dm) mod.tdf<-glm(htn ~ tdf, data=d, family=binomial) summary(mod.tdf) mod.ra<-glm(htn ~ risk.alleles, data=d, family=binomial) summary(mod.ra) exp(mod.ra$coefficients) exp(confint.default(mod.ra)) mod.raf<-glm(htn ~ factor(risk.alleles), data=d, family=binomial) summary(mod.raf) exp(mod.raf$coefficients) exp(confint.default(mod.raf)) d$hr.risk.alleles<-ifelse(d$risk.alleles==2,1,0) table(d$hr.risk.alleles,d$risk.alleles) mod.hra<-glm(htn ~ hr.risk.alleles, data=d, family=binomial) summary(mod.hra) exp(mod.hra$coefficients) exp(confint.default(mod.hra)) #### Chisquare test looking at association between sex and hypertension ## The first uses a continuity correction, the second does not. ## We compare this with logistic regression, which results in a p-value which is ## the same as the one that does not use the continuity correction. with(d, chisq.test(htn,male)) with(d, chisq.test(htn,male, correct=FALSE)) mod.male<-glm(htn~male, data=d, family=binomial) summary(mod.male) #### I created this function that takes the fitted mmodel and extracts the odds ratio, ## 95% confidence interval for the odds ratio, and the p-value. It also allows me to ## quickly multiply the coefficient by a number, "multiplier", in case I want to ## interpret an association on a different scale (e.g., instead of a 1-unit increase ## in CD4, maybe a 100-unit increase in CD4.) or.function<-function(coeff.info, multiplier=1){ or<-exp(coeff.info[1]*multiplier) lower<-exp((coeff.info[1]-1.96*coeff.info[2])*multiplier) upper<-exp((coeff.info[1]+1.96*coeff.info[2])*multiplier) pval<-coeff.info[4] names(or)<-"Odds Ratio" names(lower)<-"95% Lower CI" names(upper)<-"95% Upper CI" names(pval)<-"P-Value" print(c(or, lower, upper, pval)) } or.function(summary(mod.male)$coefficients[2,]) ### Now I am fitting logistic regression using the lrm function of the rms package. ## Everything is the same, but the functions are slightly different. library(rms) dd<-datadist(d) ## This line and the next are used to help with summarizing lrm(.) results options(datadist=dd) fit.male<-lrm(htn ~ male, data=d) summary(fit.male) ## This gives estimates for contrasts anova(fit.male) ## This computes p-values for the model mod.age<-glm(htn~age, data=d, family=binomial) summary(mod.age) or.function(summary(mod.age)$coefficients[2,]) mod.bmi<-glm(htn~bmi, data=d, family=binomial) summary(mod.bmi) or.function(summary(mod.bmi)$coefficients[2,]) mod.smoke<-glm(htn~smoke, data=d, family=binomial) summary(mod.smoke) or.function(summary(mod.smoke)$coefficients[2,]) mod.cd4<-glm(htn~cd4, data=d, family=binomial) summary(mod.cd4) or.function(summary(mod.cd4)$coefficients[2,], multiplier=100) mod.tdf<-glm(htn~tdf, data=d, family=binomial) summary(mod.tdf) or.function(summary(mod.tdf)$coefficients[2,]) mod.dtg<-glm(htn~dtg, data=d, family=binomial) summary(mod.dtg) or.function(summary(mod.dtg)$coefficients[2,]) ### jnc_bp is a categorical variable, so there are multiple odds ratios comparing ## to the reference category mod.jnc<-glm(htn~jnc_bp, data=d, family=binomial) summary(mod.jnc) or.function(summary(mod.jnc)$coefficients[2,]) or.function(summary(mod.jnc)$coefficients[3,]) or.function(summary(mod.jnc)$coefficients[4,]) mod.art<-glm(htn~years.ART, data=d, family=binomial) summary(mod.art) or.function(summary(mod.art)$coefficients[2,]) mod.uacr<-glm(htn~uACR, data=d, family=binomial) summary(mod.uacr) or.function(summary(mod.uacr)$coefficients[2,]) ### Analyzing log-transformed uACR, because it is so skewed. mod.luacr<-glm(htn~log10uACR, data=d, family=binomial) summary(mod.luacr) or.function(summary(mod.luacr)$coefficients[2,]) ############################################################################# ##### Day 2 ############################################################################# ####### Now let's do multivariable logistic regression analysis length(d$htn) # number of records in the database sum(d$htn) # number of peopel with hypertension ## Clinically relevant variable from Muktar: ## age, male, bmi, smoke, eGFR, potassium, sodium, DM ## maybe an interaction between male and age ## Let's also see if CD4 is a good predictor ## Logistic regression model with all predictor variables suggested by Muktar mod1<-glm(htn ~ age + male + bmi + smoke + eGFR + potassium + sodium + DM, data=d, family=binomial) summary(mod1) ## DM was not a good predictor of hypertension, despite it being a good predictor ## in the univariate analyses. There were very few that had DM. sum(d$DM) exp(mod1$coefficients) mod.htn<-glm(htn ~ DM, data=d, family=binomial) summary(mod.htn) exp(mod.htn$coeff) exp(confint.default(mod.htn)) #### Examining interaction between sex and age. Interaction was not statistically ## significant, and interpretation is more difficult with an interaction term, ## so from this point forward we will not include an interaction. mod.inter<-glm(htn ~ age * male + bmi + smoke + eGFR + potassium + sodium + DM, data=d, family=binomial) summary(mod.inter) exp(mod.inter$coeff) #### Checking to see if CD4 is a good predictor. It is. Notice that 162 records were ## not included in this analysis because they were missing CD4 count. mod.cd4<-glm(htn ~ age + male + bmi + smoke + eGFR + potassium + sodium + DM + cd4, data=d, family=binomial) summary(mod.cd4) exp(mod.cd4$coeff) #### I am looking at the distribution of continuous variables again. If they are highly ## skewed, I might want to transform them first. hist(d$age) hist(d$bmi) hist(d$eGFR) hist(d$potassium) summary(d$potassium) hist(d$sodium) summary(d$sodium) hist(d$cd4) hist(sqrt(d$cd4)) #### Fitting mod.cd4 again except with potassium log transformed mod2<-glm(htn ~ age + male + bmi + smoke + eGFR + I(log(potassium)) + sodium + DM + cd4, data=d, family=binomial) summary(mod2) #### I am expanding all continuous variables using natural splines with 3 df each to avoid ## assuming linearity. mod.nonlin<-glm(htn ~ ns(age, df=3) + male + ns(bmi, df=3) + smoke + ns(eGFR, df=3) + ns(potassium,df=3)+ ns(sodium, df=3) + DM + ns(cd4, df=3), data=d, family=binomial) mod.nonlin #### This does a test for non-linearity by comparing the model with all of the non-linear ## terms with the model with everything assuming linearity. I need to calculate the ## p-value myself by comparing it to a chisquare distribution with 12 degrees of freedom anova(mod.cd4, mod.nonlin) 1-pchisq(anova(mod.cd4, mod.nonlin)$Deviance, df=anova(mod.cd4, mod.nonlin)$Df) ############################################################################# ##### Day 3 ############################################################################# ### Now repeating mod2 using lrm() to fit logistic regression instead of glm() dd<-datadist(d) options(datadist="dd") fit2<-lrm(htn ~ age + male + bmi + smoke + eGFR + potassium + sodium + DM + cd4, data=d, x=TRUE, y=TRUE) ### I get the same beta coefficient estimates with both approaches fit2$coefficients mod.cd4$coefficients ### Now fitting a nonlinear model with 3 df (4 parms) allocated to each continuous variable fit.nonlin<-lrm(htn ~ rcs(age,parms=4) + male + rcs(bmi,parms=4) + smoke + rcs(eGFR,parms=4) + rcs(potassium,parms=4) + rcs(sodium,parms=4) + DM + rcs(cd4,parms=4), data=d, x=TRUE, y=TRUE) fit.nonlin ### This allows us to see tests for non-linearity: anova(fit.nonlin) ### Because the non-linear terms for BMI, sodium, and CD4 were far from statistically significant, I remove them ### and refit the model: fit.nonlin1<-lrm(htn ~ rcs(age,parms=4) + male + bmi + smoke + rcs(eGFR,parms=4) + rcs(potassium,parms=4) + sodium + DM + cd4, data=d, x=TRUE, y=TRUE) anova(fit.nonlin1) ### Let's try it again, but only using 2 df for potassium instead of 3: fit.nonlin2<-lrm(htn ~ rcs(age,parms=4) + male + bmi + smoke + rcs(eGFR,parms=4) + rcs(potassium,parms=3) + sodium + DM + cd4, data=d, x=TRUE, y=TRUE) anova(fit.nonlin2) ### Let's try it again using 2 df for all remaining variables with non-linear terms instead of 3: fit.nonlin3<-lrm(htn ~ rcs(age,parms=3) + male + bmi + smoke + rcs(eGFR,parms=3) + rcs(potassium,parms=3) + sodium + DM + cd4, data=d, x=TRUE, y=TRUE) anova(fit.nonlin3) #### Comparing the AIC between models. fit.nonlin2 appears to be the best (lowest AIC). AIC(fit.nonlin) AIC(fit.nonlin1) AIC(fit.nonlin2) AIC(fit.nonlin3) fit.nonlin2 ### The summary(.) function when applied to a model fit using lrm() shows contrasts -- i.e., odds ratios comparing ### specific values. Default is to compare the 75th and 25th percentiles of continuous variables. We can change ### which valuees of variables we want to compare by specifying the covariate and the values. summary(fit.nonlin2) anova(fit.nonlin2) round(summary(fit.nonlin2, age=c(40,20), bmi=c(20, 25), eGFR=c(100, 25), potassium=c(4, 3), sodium=c(140, 150), cd4=c(200,300)),2) round(summary(fit.nonlin2, age=c(40,30), eGFR=c(100, 50), potassium=c(4, 5)),2) round(summary(fit.nonlin2, age=c(40,50), eGFR=c(100, 75), potassium=c(4, 6)),2) round(summary(fit.nonlin2, age=c(40,60), eGFR=c(100, 125)),2) round(summary(fit.nonlin2, eGFR=c(100, 150)),2) round(summary(fit.nonlin2, eGFR=c(100, 175)),2) ###### This is the start of a table that one might want to create to present the results in an interpretable ###### manner. ######################################## #### OR 95% CI p-value #### Age <0.0001 #### 20 vs. 40 0.230880 0.056849 0.937630 #### 30 vs. 40 #### 40 vs. 40 #### 50 vs. 40 2.635300 2.000600 3.471300 #### 60 vs. 40 #### CD4 (per 100 cells) 1.065900 1.017200 1.116800 ### Interaction between age and sex in our best fitting model fit.nonlin2.inter<-lrm(htn ~ rcs(age,parms=4)*male + bmi + smoke + rcs(eGFR,parms=4) + rcs(potassium,parms=3) + sodium + DM + cd4, data=d, x=TRUE, y=TRUE) anova(fit.nonlin2.inter) ###### Plotting Odds ratios as a function of age ages<-c(min(d$age):max(d$age)) #### Creating a variable that includes all ages between the youngest and oldest person ests<-matrix(NA,length(ages),3) for(i in 1:length(ages)){ ests[i,]<-summary(fit.nonlin2, age=c(40, ages[i]))[2,c(4,6,7)] } plot(rep(ages,3),log(c(ests[,1],ests[,2],ests[,3])),xlab="Age", ylab="Odds Ratio", axes=FALSE, type="n") abline(h=0,lwd=1,col=gray(.8)); axis(1); box() axis(2, at=log(c(5, 2, 1, .5, .2)), labels=c("5", "2", "1", "0.5", "0.2")) lines(ages,log(ests[,1])) lines(ages,log(ests[,2]), lty=2) lines(ages,log(ests[,3]), lty=2) ###### Plotting Odds ratios as a function of eGFR eGFRs<-c(round(min(d$eGFR)):round(max(d$eGFR))) ests<-matrix(NA,length(eGFRs),3) for(i in 1:length(eGFRs)){ ests[i,]<-summary(fit.nonlin2, eGFR=c(100, eGFRs[i]))[10,c(4,6,7)] } plot(rep(eGFRs,3),log(c(ests[,1],ests[,2],ests[,3])),xlab="eGFR", ylab="Odds Ratio", axes=FALSE, type="n") abline(h=0,lwd=1,col=gray(.8)); axis(1); box() axis(2, at=log(c(5, 2, 1, .5, .2)), labels=c("5", "2", "1", "0.5", "0.2")) lines(eGFRs,log(ests[,1])) lines(eGFRs,log(ests[,2]), lty=2) lines(eGFRs,log(ests[,3]), lty=2) ###### Plotting Odds ratios as a function of potassium potassiums<-c((round(min(d$potassium))*10):70)/10 ests<-matrix(NA,length(potassiums),3) for(i in 1:length(potassiums)){ ests[i,]<-summary(fit.nonlin2, potassium=c(4, potassiums[i]))[12,c(4,6,7)] } plot(rep(potassiums,3),log(c(ests[,1],ests[,2],ests[,3])),xlab="Potassium", ylab="Odds Ratio", axes=FALSE, type="n") abline(h=0,lwd=1,col=gray(.8)); axis(1); box() axis(2, at=log(c(5, 2, 1, .5, .2)), labels=c("5", "2", "1", "0.5", "0.2")) lines(potassiums,log(ests[,1])) lines(potassiums,log(ests[,2]), lty=2) lines(potassiums,log(ests[,3]), lty=2) ############################################################################# ##### Day 4 ############################################################################# ### Now we are going to analyze the ordinal variable JNC BP table(d$jnc_bp) table(d$jnc_bp)/length(d$jnc_bp) ### This is required for using the lrm(.) package for ordinal logistic regression. library(rms) dd<-datadist(d) options(datadist="dd") ### Fitting ordinal logistic regression using the best fitting model for logistic regression. fit.ord<-lrm(jnc_bp ~ rcs(age,parms=4) + male + bmi + smoke + rcs(eGFR,parms=4) + rcs(potassium,parms=3) + sodium + DM + cd4, data=d, x=TRUE, y=TRUE) anova(fit.ord) ### Let's try it again, but without CD4 fit.ord.no.cd4<-lrm(jnc_bp ~ rcs(age,parms=4) + male + bmi + smoke + rcs(eGFR,parms=4) + rcs(I(log(potassium)),parms=3) + sodium + DM, data=d, x=TRUE, y=TRUE) anova(fit.ord.no.cd4) #### This investigates the proportional odds assumption. There is evidence that the PO assumption is violated. ### We did not make any changes to the model, however. impactPO(jnc_bp ~ age + male + bmi + smoke + eGFR + potassium + sodium + DM, data=d, newdata=d[1,]) ### Obtaining odds ratios comparing different levels of the ordinal models anova(fit.ord) round(summary(fit.ord, age=c(40,20), bmi=c(20, 25), eGFR=c(100, 25), potassium=c(4, 3), sodium=c(140, 150), cd4=c(200,300)),2) round(summary(fit.ord, age=c(40,30), eGFR=c(100, 50), potassium=c(4, 5)),2) round(summary(fit.ord, age=c(40,50), eGFR=c(100, 75), potassium=c(4, 6)),2) round(summary(fit.ord, age=c(40,60), eGFR=c(100, 125)),2) round(summary(fit.ord, eGFR=c(100, 150)),2) round(summary(fit.ord, eGFR=c(100, 175)),2) #### Plotting odds ratios as a function of age ages<-c(min(d$age):max(d$age)) #### Creating a variable that includes all ages between the youngest and oldest person ests<-matrix(NA,length(ages),3) for(i in 1:length(ages)){ ests[i,]<-summary(fit.ord, age=c(40, ages[i]))[2,c(4,6,7)] } plot(rep(ages,3),log(c(ests[,1],ests[,2],ests[,3])),xlab="Age", ylab="Odds Ratio", axes=FALSE, type="n") abline(h=0,lwd=1,col=gray(.8)); axis(1); box() axis(2, at=log(c(1.5, 1, .5, .33, .2, .1)), labels=c("1.5", "1", "0.5", "0.33", "0.2", "0.1")) lines(ages,log(ests[,1])) lines(ages,log(ests[,2]), lty=2) lines(ages,log(ests[,3]), lty=2) eGFRs<-c(round(min(d$eGFR)):round(max(d$eGFR))) ests<-matrix(NA,length(eGFRs),3) for(i in 1:length(eGFRs)){ ests[i,]<-summary(fit.ord, eGFR=c(100, eGFRs[i]))[10,c(4,6,7)] } plot(rep(eGFRs,3),log(c(ests[,1],ests[,2],ests[,3])),xlab="eGFR", ylab="Odds Ratio", axes=FALSE, type="n") abline(h=0,lwd=1,col=gray(.8)); axis(1); box() axis(2, at=log(c(1.5, 1, .5, .33, .2, .1)), labels=c("1.5", "1", "0.5", "0.33", "0.2", "0.1")) lines(eGFRs,log(ests[,1])) lines(eGFRs,log(ests[,2]), lty=2) lines(eGFRs,log(ests[,3]), lty=2) ###### Plotting Odds ratios as a function of potassium potassiums<-c((round(min(d$potassium))*10):70)/10 ests<-matrix(NA,length(potassiums),3) for(i in 1:length(potassiums)){ ests[i,]<-summary(fit.ord, potassium=c(4, potassiums[i]))[12,c(4,6,7)] } plot(rep(potassiums,3),log(c(ests[,1],ests[,2],ests[,3])),xlab="Potassium", ylab="Odds Ratio", axes=FALSE, type="n") abline(h=0,lwd=1,col=gray(.8)); axis(1); box() axis(2, at=log(c(5, 2, 1, .5, .2)), labels=c("5", "2", "1", "0.5", "0.2")) lines(potassiums,log(ests[,1])) lines(potassiums,log(ests[,2]), lty=2) lines(potassiums,log(ests[,3]), lty=2) ################################################################### ##### Cumulative probability models ##### Analyzing continuous variable with ordinal regression ################################################################### ##### The rms package is also required for these analyses library(rms) dd<-datadist(d) options(datadist="dd") #### Diastoloic blood pressure (DBP) looks fairly normally distributed, so it's probably OK to do linear regression. hist(d$DBP) summary(d$DBP) #### The function ols(.) in the rms package fits linear regression models, and it is easy to obtain info #### from these models. fit.ols<-ols(DBP ~ rcs(age,parms=4) + male + bmi + smoke + rcs(eGFR,parms=4) + rcs(potassium,parms=3) + sodium + DM + cd4, data=d, x=TRUE, y=TRUE) #### Repeating using lm(.) package that is more commonly used. We get the same model coefficients using lm vs. ols. fit.lm<-lm(DBP ~ rcs(age,parms=4) + male + bmi + smoke + rcs(eGFR,parms=4) + rcs(potassium,parms=3) + sodium + DM + cd4, data=d) ### Likelihood ratio tests to examine the p-values for variables' associations with DBP and non-linearity anova(fit.ols) ### Now we are summarizing and contrasting the expected DBP based on various levels of covariates from our fitted model round(summary(fit.ols, age=c(40,20), bmi=c(20, 25), eGFR=c(100, 25), potassium=c(4, 3), sodium=c(140, 150), cd4=c(200,300)),2) round(summary(fit.ols, age=c(40,30), eGFR=c(100, 50), potassium=c(4, 5)),2) round(summary(fit.ols, age=c(40,50), eGFR=c(100, 75), potassium=c(4, 6)),2) round(summary(fit.ols, age=c(40,60), eGFR=c(100, 125)),2) round(summary(fit.ols, eGFR=c(100, 150)),2) round(summary(fit.ols, eGFR=c(100, 175)),2) #### Plotting odds ratios as a function of age ages<-c(min(d$age):max(d$age)) #### Creating a variable that includes all ages between the youngest and oldest person ests<-matrix(NA,length(ages),3) for(i in 1:length(ages)){ ests[i,]<-summary(fit.ols, age=c(40, ages[i]))[1,c(4,6,7)] } plot(rep(ages,3),c(ests[,1],ests[,2],ests[,3]),xlab="Age", ylab="Effect Size", axes=FALSE, type="n") abline(h=0,lwd=1,col=gray(.8)); axis(1); axis(2); box() lines(ages,ests[,1]) lines(ages,ests[,2], lty=2) lines(ages,ests[,3], lty=2) eGFRs<-c(round(min(d$eGFR)):round(max(d$eGFR))) ests<-matrix(NA,length(eGFRs),3) for(i in 1:length(eGFRs)){ ests[i,]<-summary(fit.ols, eGFR=c(100, eGFRs[i]))[5,c(4,6,7)] } plot(rep(eGFRs,3),c(ests[,1],ests[,2],ests[,3]),xlab="eGFR", ylab="Effect Size", type="n") abline(h=0,lwd=1,col=gray(.8)); lines(eGFRs,ests[,1]) lines(eGFRs,ests[,2], lty=2) lines(eGFRs,ests[,3], lty=2) potassiums<-c((round(min(d$potassium))*10):70)/10 ests<-matrix(NA,length(potassiums),3) for(i in 1:length(potassiums)){ ests[i,]<-summary(fit.ols, potassium=c(4, potassiums[i]))[6,c(4,6,7)] } plot(rep(potassiums,3),c(ests[,1],ests[,2],ests[,3]),xlab="Potassium", ylab="Effect Size", type="n") abline(h=0,lwd=1,col=gray(.8)) lines(potassiums,ests[,1]) lines(potassiums,ests[,2], lty=2) lines(potassiums,ests[,3], lty=2) #### Plot of the expected value of DBP as a function of age, holding all other covariates at their medians or modes, expect.ols<-Predict(fit.ols, age=ages) plot(rep(ages,2),c(expect.ols$lower,expect.ols$upper),type="n", xlab="Age", ylab="Expected DBP") lines(ages,expect.ols$yhat) lines(ages,expect.ols$lower, lty=2) lines(ages,expect.ols$upper, lty=2) #### Fitting a semiparametric cumulative probability model (CPM) using the orm function. This treats each value of the #### continuous variable as if it were a category and then analyzes the data using ordinal logistic regression. fit.orm<-orm(DBP ~ rcs(age,parms=4) + male + bmi + smoke + rcs(eGFR,parms=4) + rcs(potassium,parms=3) + sodium + DM + cd4, data=d, x=TRUE, y=TRUE) #### p-values from the CPM) anova(fit.orm) #### investigating another model that treats the association with potassium as linear fit.orm1<-orm(DBP ~ rcs(age,parms=4) + male + bmi + smoke + rcs(eGFR,parms=4) + potassium + sodium + DM, data=d, x=TRUE, y=TRUE) anova(fit.orm1) #### The AIC for fit.orm is slightly lower, but fit.orm1 is slightly easier to interpret. So I decided to go with #### fit.orm1. AIC(fit.orm) AIC(fit.orm1) #### Getting odds ratios from the CPM. summary(fit.orm1, age=c(40,20), bmi=c(20,25)) summary(fit.ols) #### This code extracts from the CPM the expectation of DBP as a function of age. Then it plots it. mean.fun<-Mean(fit.orm1) pred.means<-Predict(fit.orm1, age=ages, fun=function(x) mean.fun(x)) plot(c(ages,ages),c(pred.means$lower,pred.means$upper),type="n",xlab="Age",ylab="Expected DBP") lines(ages,pred.means$yhat,col=2) lines(ages,pred.means$lower,lty=2, col=2) lines(ages,pred.means$upper,lty=2,col=2) #### This code extracts from the CPM the median of DBP as a function of age. Then it plots it. quants.fun<-Quantile(fit.orm1) pred.medians<-Predict(fit.orm1, age=ages, fun=function(x) quants.fun(0.5, x)) plot(c(ages,ages),c(pred.medians$lower,pred.medians$upper),type="n",xlab="Age",ylab="Median DBP") lines(ages,pred.medians$yhat,col=3) lines(ages,pred.medians$lower,lty=2, col=3) lines(ages,pred.medians$upper,lty=2,col=3)