---
title: "R3 Microalbuminuria - Aim 1 - Manuscript 1"
author: "Heather L Prigmore, Bryan E Shepherd"
date: "`r date()`"
output:
 html_document:
    number_sections: true
    #code_folding: hide
    theme: lumen
    toc: true
    toc_float:
      collapsed: true
      smooth_scroll: true
---

```{r setup, include=FALSE}
rm(list=ls())

options(prType="html", width=1800)
knitr::opts_chunk$set(echo = FALSE)

suppressPackageStartupMessages(require(knitr))
suppressPackageStartupMessages(require(plyr))
suppressPackageStartupMessages(require(dplyr))
suppressPackageStartupMessages(require(tgsify))
suppressPackageStartupMessages(require(Hmisc))
suppressPackageStartupMessages(require(rms))
suppressPackageStartupMessages(require(kableExtra))
suppressPackageStartupMessages(require(stringr))
suppressPackageStartupMessages(require(lubridate))
suppressPackageStartupMessages(require(here))
suppressPackageStartupMessages(require(tidyr))
suppressPackageStartupMessages(require(data.table))
suppressPackageStartupMessages(require(tidyverse))
suppressPackageStartupMessages(require(ggplot2))
suppressPackageStartupMessages(require(memisc))
suppressPackageStartupMessages(require(car))
suppressPackageStartupMessages(require(stringi))
suppressPackageStartupMessages(require(pander))
suppressPackageStartupMessages(require(sjPlot))
suppressPackageStartupMessages(require(sjmisc))
suppressPackageStartupMessages(require(sjlabelled))
suppressPackageStartupMessages(require(boot))
suppressPackageStartupMessages(require(xtable))

```

```{r data}

df <- readRDS("../data/R3Aim1_data_20200710.rds")

df$smoke.f <- relevel(df$smoke.f, ref="No")
df$ethnicity.f <- relevel(df$ethnicity.f, ref = "Hausa/Fulani")
df$level_art_trt.f <- relevel(df$level_art_trt.f, ref = "First Line")
df$log_uacr <- log10(df$avg_uacr)
df$log_egfr <- log10(df$egfr)
df$log_sc <- log10(df$serum_creatinine)

df2 <- df[is.na(df$risk_alleles.f) == FALSE, ]

```

# Project Summary

Optimal Management of HIV+ Adults at Risk for Kidney Disease in Nigeria (U01 DK112271).

**Specific Aim 1**: To determine the prevalence of APOL1 risk variants among 2,600 HIV+ individuals in Nigeria and assess wheather APOL1 HR status correlated with prevalent albuminuria, reduced eGFR, and/or prevalent CKD (defined as macroalbuminuria or eGFR < 60 ml/min/1.73m^2^) in West African population.

**Hypothesis**: ~25% of those screened will carry the APOL1 HR genotype (defined as carriage of two risk alleles), which will be associated with albuminuria, lower baseline eGFR, higher uACR, and higher rates of prevalent CKD.

**Definitions**: Microalbuminuria is defined as having a mean uACR of 30-300 mg/g. CKD is defined as an eGFR of < 60 ml/min/1,73m^2^. APOL1 risk category is defined as 0-1 allele = Low Risk, 2 alleles = High Risk.

# Manuscript 1 Purpose

Compare the APOL1 risk allele genotypes (Low-Risk, High-Risk) to the phenotypes Microalbuminuria (average uACR), eGFR and serum creatinine.

This analysis is based on those who are not missing APOL1 risk category (`r nrow(df2)` records)

# Population Characteristics
```{r Table 1, warning=F}

tbl1_formula <- (
    age
  + sex.f
  + ethnicity.f
  + smoke.f
  + smoke_packs.f
  + smoke_duration
  + dm.f
  + htn.f
  + hf.f
  + others.f
  + duration_on_art
  + duration_on_art.f
  + level_art_trt.f
  + tenofovir.f
  + dolutegravir.f
  + cd4
  + vl.f
  + current_meds.f
  + ace_arb.f
  + mean_bmi
  + mean_bmi.f
  + mean_sbp
  + mean_dbp
  + jnc_bp.f
  + avg_uacr
  + albuminuria.f
  + serum_creatinine
  + egfr
  + ckd_byegfr.f
  + ckd_byuacr.f
  + ckd_byboth.f

  ~ risk_alleles.f
)

tbl1 <- summaryM(
  tbl1_formula
  , data = df2
  , continuous = 4
  , overall = T
  , na.include = F
  #, na.action = na.retain
  , test = T
)

tbl1 <- summaryM_to_df(
  tbl1
  , long = T
  , exclude1 = F
  , what = "%"
  , digits = 4
  , pctdig = 2
  , vnames="labels"
) %>% sapply(slice_to_box)

colnames(tbl1)[1] <- "Variable"
tbl1 %>% kable(escape=F, digits=4) %>% trimws %>% kable_styling(c("striped","bordered")) %>%
  footnote(general_title = "",
    general = "Statistics presented: Median [IQR]; % (N)",
    alphabet_title = "Tests conducted:",
    alphabet = c("Continuous variables: Wilcoxon","Categorical variables: Pearson Chi-square"))

```

# Box Plots
## Risk Allele (Low/High)
### Average uACR
```{r Box Plot uACR, warning=F}

# Avg uACR
ggplot(df2, aes(x=risk_alleles.f, y=avg_uacr)) +
    geom_boxplot() + xlab("Risk Allele Category") + ylab("Average uACR")

df2 %>%
  #append_group(risk_alleles.f,All ~ Low-Risk + High-Risk) %>%
  split(.$risk_alleles.f) %>%
  lwith({o <- summary(avg_uacr); o[8] <- sum(!is.na(avg_uacr)); names(o)[8] <- "N"; o}) %>%
  do.call("rbind", .) %>%
  kable(digits = 2) %>%
  kable_styling(c("bordered","striped"))

# Log10 Transformed Avg uACR
ggplot(df2, aes(x=risk_alleles.f, y=log_uacr)) +
    geom_boxplot() + xlab("Risk Allele Category") + ylab("Log10 Transformed Average uACR")

df2 %>%
  #append_group(risk_alleles.f,All ~ Low-Risk + High-Risk) %>%
  split(.$risk_alleles.f) %>%
  lwith({o <- summary(log_uacr); o[8] <- sum(!is.na(log_uacr)); names(o)[8] <- "N"; o}) %>%
  do.call("rbind", .) %>%
  kable(digits = 2) %>%
  kable_styling(c("bordered","striped"))
```

### eGFR
```{r Box Plot eGFR}
# eGFR
ggplot(df2, aes(x=risk_alleles.f, y=egfr)) +
    geom_boxplot() + xlab("Risk Allele Category") + ylab("eGFR")

df2 %>%
  #append_group(risk_alleles.f,All ~ Low-Risk + High-Risk) %>%
  split(.$risk_alleles.f) %>%
  lwith({o <- summary(egfr); o[8] <- sum(!is.na(egfr)); names(o)[8] <- "N"; o}) %>%
  do.call("rbind", .) %>%
  kable(digits = 2) %>%
  kable_styling(c("bordered","striped"))

# Log10 Transformed eGFR
ggplot(df2, aes(x=risk_alleles.f, y=log_egfr)) +
    geom_boxplot() + xlab("Risk Allele Category") + ylab("Log10 Transformed eGFR")

df2 %>%
  #append_group(risk_alleles.f,All ~ Low-Risk + High-Risk) %>%
  split(.$risk_alleles.f) %>%
  lwith({o <- summary(log_egfr); o[8] <- sum(!is.na(log_egfr)); names(o)[8] <- "N"; o}) %>%
  do.call("rbind", .) %>%
  kable(digits = 2) %>%
  kable_styling(c("bordered","striped"))
```

### Serum Creatinine
```{r Box Plot Serum Creatinine}
# Serum Creatinine
ggplot(df2, aes(x=risk_alleles.f, y=serum_creatinine)) +
    geom_boxplot() + xlab("Risk Allele Category") + ylab("Serum Creatinine")

df2 %>%
  #append_group(risk_alleles.f,All ~ Low-Risk + High-Risk) %>%
  split(.$risk_alleles.f) %>%
  lwith({o <- summary(serum_creatinine); o[8] <- sum(!is.na(serum_creatinine)); names(o)[8] <- "N"; o}) %>%
  do.call("rbind", .) %>%
  kable(digits = 2) %>%
  kable_styling(c("bordered","striped"))

# Log10 Transformed Serum Creatinine
ggplot(df2, aes(x=risk_alleles.f, y=log_sc)) +
    geom_boxplot() + xlab("Risk Allele Category") + ylab("Log10 Transformed Serum Creatinine")

df2 %>%
  #append_group(risk_alleles.f,All ~ Low-Risk + High-Risk) %>%
  split(.$risk_alleles.f) %>%
  lwith({o <- summary(log_sc); o[8] <- sum(!is.na(log_sc)); names(o)[8] <- "N"; o}) %>%
  do.call("rbind", .) %>%
  kable(digits = 2) %>%
  kable_styling(c("bordered","striped"))
```

## Risk Allele (No alleles, 1 allele, 2 alleles)
### Average uACR
```{r Box Plot uACR2, warning=F}

# Avg uACR
ggplot(df2, aes(x=risk_alleles_3.f, y=avg_uacr)) +
    geom_boxplot() + xlab("Risk Allele Category") + ylab("Average uACR")

df2 %>%
  #append_group(risk_alleles_3.f,All ~ Low-Risk + High-Risk) %>%
  split(.$risk_alleles_3.f) %>%
  lwith({o <- summary(avg_uacr); o[8] <- sum(!is.na(avg_uacr)); names(o)[8] <- "N"; o}) %>%
  do.call("rbind", .) %>%
  kable(digits = 2) %>%
  kable_styling(c("bordered","striped"))

# Log10 Transformed Avg uACR
ggplot(df2, aes(x=risk_alleles_3.f, y=log_uacr)) +
    geom_boxplot() + xlab("Risk Allele Category") + ylab("Log10 Transformed Average uACR")

df2 %>%
  #append_group(risk_alleles_3.f,All ~ Low-Risk + High-Risk) %>%
  split(.$risk_alleles_3.f) %>%
  lwith({o <- summary(log_uacr); o[8] <- sum(!is.na(log_uacr)); names(o)[8] <- "N"; o}) %>%
  do.call("rbind", .) %>%
  kable(digits = 2) %>%
  kable_styling(c("bordered","striped"))
```

### eGFR
```{r Box Plot eGFR2}
# eGFR
ggplot(df2, aes(x=risk_alleles_3.f, y=egfr)) +
    geom_boxplot() + xlab("Risk Allele Category") + ylab("eGFR")

df2 %>%
  #append_group(risk_alleles_3.f,All ~ Low-Risk + High-Risk) %>%
  split(.$risk_alleles_3.f) %>%
  lwith({o <- summary(egfr); o[8] <- sum(!is.na(egfr)); names(o)[8] <- "N"; o}) %>%
  do.call("rbind", .) %>%
  kable(digits = 2) %>%
  kable_styling(c("bordered","striped"))

# Log10 Transformed eGFR
ggplot(df2, aes(x=risk_alleles_3.f, y=log_egfr)) +
    geom_boxplot() + xlab("Risk Allele Category") + ylab("Log10 Transformed eGFR")

df2 %>%
  #append_group(risk_alleles_3.f,All ~ Low-Risk + High-Risk) %>%
  split(.$risk_alleles_3.f) %>%
  lwith({o <- summary(log_egfr); o[8] <- sum(!is.na(log_egfr)); names(o)[8] <- "N"; o}) %>%
  do.call("rbind", .) %>%
  kable(digits = 2) %>%
  kable_styling(c("bordered","striped"))
```

## Serum Creatinine
```{r Box Plot Serum Creatinine2}
# Serum Creatinine
ggplot(df2, aes(x=risk_alleles_3.f, y=serum_creatinine)) +
    geom_boxplot() + xlab("Risk Allele Category") + ylab("Serum Creatinine")

df2 %>%
  #append_group(risk_alleles_3.f,All ~ Low-Risk + High-Risk) %>%
  split(.$risk_alleles_3.f) %>%
  lwith({o <- summary(serum_creatinine); o[8] <- sum(!is.na(serum_creatinine)); names(o)[8] <- "N"; o}) %>%
  do.call("rbind", .) %>%
  kable(digits = 2) %>%
  kable_styling(c("bordered","striped"))

# Log10 Transformed Serum Creatinine
ggplot(df2, aes(x=risk_alleles_3.f, y=log_sc)) +
    geom_boxplot() + xlab("Risk Allele Category") + ylab("Log10 Transformed Serum Creatinine")

df2 %>%
  #append_group(risk_alleles_3.f,All ~ Low-Risk + High-Risk) %>%
  split(.$risk_alleles_3.f) %>%
  lwith({o <- summary(log_sc); o[8] <- sum(!is.na(log_sc)); names(o)[8] <- "N"; o}) %>%
  do.call("rbind", .) %>%
  kable(digits = 2) %>%
  kable_styling(c("bordered","striped"))
```

# Primary Analyses

```{r CPM setup, include=FALSE}

#### function to estimate conditional mean and its standard error for orm model
mean.orm <- function(mod, new.data, se=TRUE){
  if(is.null(mod$yunique)) {
    stop("Need to set x=TRUE and y=TRUE for orm")
  } else{
    order.y <- mod$yunique
    n.alpha <- length(order.y)-1
    xb <- as.matrix(new.data)%*%matrix(coef(mod)[colnames(new.data)])
    m.alpha <- mod$coef[1:n.alpha]
    lb <- t(outer(m.alpha, xb, "+")[,,1])
    m.s <- mod$trans$cumprob(lb)
    m.f <- t(apply(m.s, 1, FUN=function(x) c(1,x[1:n.alpha]) - c(x[1:n.alpha], 0)))
    m.mean <- apply(m.f, 1, FUN=function(x) sum(x*order.y))

    if(se){
      if(mod$family=="logistic") mod$trans$deriv <- function(x) exp(-x)/(1+exp(-x))^2

      dmean.dalpha <- t(apply(mod$trans$deriv(lb), 1, FUN=function(x) x*(order.y[2:length(order.y)] - order.y[1:n.alpha])))
      dmean.dbeta <- apply(dmean.dalpha, 1, sum)*as.matrix(new.data)
      dmean.dtheta <- cbind(dmean.dalpha, dmean.dbeta)
      mean.var <-diag(dmean.dtheta%*%solve(mod$info.matrix)%*%t(dmean.dtheta))
      mean.se <- sqrt(mean.var)
      result <- cbind(m.mean, mean.se)
      ci <- t(apply(result, 1, FUN=function(x) c(x[1]- qnorm(0.975)*x[2], x[1]+ qnorm(0.975)*x[2])))
      result <- cbind(result, ci)
      colnames(result) <- c("est", "se", "lb", "ub")
    } else{
      result <- matrix(m.mean)
      colnames(result) <- c("est")
    }


    return(result)


  }

}


#### function to estimate conditional quantiles and confidence intervals for orm model
quantile.orm <- function(mod, new.data, probs=0.5, se=TRUE){

  quantile <- matrix(NA, nrow=dim(new.data)[1], ncol=length(probs))
  order.y <- mod$yunique
  #n.alpha <- length(order.y)-1
  xb <- as.matrix(new.data)%*%matrix(coef(mod)[colnames(new.data)])
  alpha <- mod$coef[1:(length(unique(order.y))-1)]
  lb <- t(outer(alpha, xb, "+")[,,1])
  m.cdf <- 1- mod$trans$cumprob(lb)
  m.cdf <- cbind(0, m.cdf, 1)
  for(i in 1: length(probs)){
    try({
      index.1 <- apply(m.cdf, 1, FUN=function(x){ max(which(x<=probs[i]))[1]} )
      index.2 <- apply(m.cdf, 1, FUN=function(x){ min(which(x>=probs[i]))[1]} )

      index.y1 <- ifelse(index.1>length(order.y), Inf, order.y[index.1])
      index.y2 <- ifelse(index.2>length(order.y),Inf,order.y[index.2])

      index.y1.cdf <- ifelse(index.1==0, 0, m.cdf[cbind(1:dim(new.data)[1], index.1)])

      index.y2.cdf <- ifelse(index.2>length(order.y), 1, m.cdf[cbind(1:dim(new.data)[1], index.2)])


      quantile[,i] <- ifelse(index.1==index.2, index.y1,
                             (index.y2-index.y1)/(index.y2.cdf - index.y1.cdf)*(probs[i]-index.y1.cdf) + index.y1)
      quantile[, i] <- ifelse(is.infinite(quantile[,i]), max(order.y), quantile[, i])
    })

  }
  result <- quantile

  if(se){
    if(mod$family=="logistic") mod$trans$deriv <- function(x) exp(-x)/(1+exp(-x))^2

    quantile.lb <- quantile.ub <- matrix(NA, nrow=dim(new.data)[1], ncol=length(probs))
    lb.se <- matrix(NA, ncol=dim(lb)[2], nrow=dim(new.data)[1])
    var <- as.matrix(solve(mod$info.matrix))

    for(i in 1:dim(lb)[2]){
      var.i <- var[c(i, which(names(coef(mod)) %in% colnames(new.data))),
                   c(i, which(names(coef(mod)) %in% colnames(new.data)))]

      dcdf.dtheta <- cbind(-mod$trans$deriv(lb[,i]),
                           -mod$trans$deriv(lb[,i])*as.matrix(new.data) )
      dlb.dtheta <- as.matrix(cbind(1, new.data))
      lb.se[,i] <- sqrt(diag(dlb.dtheta%*%var.i%*% t(dlb.dtheta)))
    }

    ci.lb <- sapply(1:dim(lb)[2], FUN=function(i) { 1- mod$trans$cumprob(lb[, i] +qnorm(0.975)*lb.se[, i])})
    ci.ub <- sapply(1:dim(lb)[2], FUN=function(i) { 1- mod$trans$cumprob(lb[, i] -qnorm(0.975)*lb.se[, i])})
    ci.lb <- matrix(ci.lb, nrow=dim(new.data)[1])
    ci.ub <- matrix(ci.ub, nrow=dim(new.data)[1])

    ci.lb <- cbind(0, ci.lb, 1)
    ci.ub <- cbind(0, ci.ub, 1)

    for(i in 1: length(probs)){
      try({
        index.1 <- apply(ci.lb, 1, FUN=function(x){ max(which(x<=probs[i]))[1]} )
        index.2 <- apply(ci.lb, 1, FUN=function(x){ min(which(x>=probs[i]))[1]} )

        index.y1 <- ifelse(index.1>length(order.y), Inf, order.y[index.1])
        index.y2 <- ifelse(index.2>length(order.y),Inf,order.y[index.2])

        index.y1.cdf <- ifelse(index.1==0, 0, ci.lb[cbind(1:dim(new.data)[1], index.1)])

        index.y2.cdf <- ifelse(index.2>length(order.y), 1, ci.lb[cbind(1:dim(new.data)[1], index.2)])


        quantile.lb[,i] <- ifelse(index.1==index.2, index.y1,
                                  (index.y2-index.y1)/(index.y2.cdf - index.y1.cdf)*(probs[i]-index.y1.cdf) + index.y1)
        quantile.lb[, i] <- ifelse(is.infinite(quantile.lb[,i]), max(order.y), quantile.lb[, i])

        index.1 <- apply(ci.ub, 1, FUN=function(x){ max(which(x<=probs[i]))[1]} )
        index.2 <- apply(ci.ub, 1, FUN=function(x){ min(which(x>=probs[i]))[1]} )

        index.y1 <- ifelse(index.1>length(order.y), Inf, order.y[index.1])
        index.y2 <- ifelse(index.2>length(order.y),Inf,order.y[index.2])

        index.y1.cdf <- ifelse(index.1==0, 0, ci.ub[cbind(1:dim(new.data)[1], index.1)])

        index.y2.cdf <- ifelse(index.2>length(order.y), 1, ci.ub[cbind(1:dim(new.data)[1], index.2)])


        quantile.ub[,i] <- ifelse(index.1==index.2, index.y1,
                                  (index.y2-index.y1)/(index.y2.cdf - index.y1.cdf)*(probs[i]-index.y1.cdf) + index.y1)
        quantile.ub[, i] <- ifelse(is.infinite(quantile.ub[,i]), max(order.y), quantile.ub[, i])


      })

    }

    result <- list(quantile=quantile,
                   lb=quantile.ub,
                   ub=quantile.lb)


  }


  return(result)

}


#### function to estimate conditional CDF and its standard error for orm models
cdf.orm <- function(mod, new.data, at.y=0,se=TRUE){
  if(is.null(mod$yunique)) {
    stop("Need to set x=TRUE and y=TRUE for orm")
  } else{
    order.y <- mod$yunique
    xb <- as.matrix(new.data)%*%matrix(coef(mod)[colnames(new.data)])

    index <- sapply(at.y, FUN=function(x) {if(x<min(order.y)[1]) result <- Inf
    else if (x==min(order.y)[1]) result <- 1
    else if(x >= max(order.y)[1]) result <- -Inf
    else which(order.y>=x)[1]-1})

    m.alpha <- mod$coef[index]
    m.alpha <- ifelse(is.infinite(index), index, m.alpha)
    if(length(at.y)==1){
      lb <- as.matrix(outer(m.alpha, xb, "+")[,,1])
    } else lb <- t(outer(m.alpha, xb, "+")[,,1])
    m.cdf <- 1- mod$trans$cumprob(lb)


    if(se){
      if(mod$family=="logistic") mod$trans$deriv <- function(x) exp(-x)/(1+exp(-x))^2
      cdf.se <- matrix(NA, ncol=length(at.y), nrow=dim(new.data)[1])
      lb.se <- matrix(NA, ncol=length(at.y), nrow=dim(new.data)[1])

      var <- as.matrix(solve(mod$info.matrix))

      for(i in 1:length(at.y)) {

        var.i <- var[c(index[i], which(names(coef(mod)) %in% colnames(new.data))),
                     c(index[i], which(names(coef(mod)) %in% colnames(new.data)))]
        dcdf.dtheta <- cbind(-mod$trans$deriv(lb[,i]),
                             -mod$trans$deriv(lb[,i])*as.matrix(new.data) )
        dlb.dtheta <- as.matrix(cbind(1, new.data))
        cdf.se[,i] <- sqrt(diag(dcdf.dtheta %*% var.i%*% t(dcdf.dtheta)))
        lb.se[, i] <- sqrt(diag(dlb.dtheta%*%var.i%*% t(dlb.dtheta)))

      }
      ci.lb <- sapply(1:length(at.y), FUN=function(i) { 1- mod$trans$cumprob(lb[, i] +qnorm(0.975)*lb.se[, i])})
      ci.ub <- sapply(1:length(at.y), FUN=function(i) { 1- mod$trans$cumprob(lb[, i] -qnorm(0.975)*lb.se[, i])})


      result <- list(est=m.cdf,
                     se=cdf.se,
                     lb=ci.lb,
                     ub=ci.ub)
    } else{
      result <- list(est=m.cdf)
    }


    return(result)

  }

}

```

```{r}
model.est.orm <- function(mod) {  
  
                  sum <- data.frame(summary(mod,age=c(30,40),duration_on_art=c(1,2),cd4_sqrt=c(sqrt(200),sqrt(500)),mean_bmi=c(20,25),risk_alleles.f="Low-Risk",sex.f="Male",ethnicity.f="Hausa/Fulani",tenofovir.f="No",level_art_trt.f="First Line",vl.f="<= 200",dm.f="No",htn.f="No",hf.f="No",others.f="No",ace_arb.f="No",smoke.f="No",jnc_bp.f="Normal"))
                  sum$or <- format(round(sum$Effect,2),nsmall=2,scientific=FALSE,trim=TRUE)
                  sum$lb <- format(round(sum$Lower.0.95,2),nsmall=2,scientific=FALSE,trim=TRUE)
                  #sum$ub <- format(round(sum$Upper.0.95,2),nsmall=2,scientific=FALSE,trim=TRUE)
                  sum$ub <- format(round(ifelse(sum$Upper.0.95>=100.00,100.00,sum$Upper.0.95),2),nsmall=2,scientific=FALSE,trim=TRUE)
                  sum$ci <- paste0("(",sum$lb,", ",sum$ub,")")
      
                  sum <- data.frame(var = ifelse(startsWith(rownames(sum), "X"),NA,sub("^([^.]*.[^.]*).*", "\\1", rownames(sum))),sum)
                  sum <- fill(sum,"var",.direction="down")
                  sum <- sum[sum$var %in% mod$Design$name & sum$Type==2,]
                  rownames(sum) <- c()
                  
                  level <- c(NA,NA,NA,NA,"High-Risk","Female","Igbo","Yoruba","Other","Yes","Second Line",">200","Yes","Yes","Yes","Yes","Yes","Yes","Pre-hypertension","Stage 1 Hypertension","Stage 2 Hypertension")
                
                  sum <- data.frame(sum, level)
                  sum$Low <- ifelse(is.na(sum$level),round(sum$Low,2),NA)
                  sum$High <- ifelse(is.na(sum$level),round(sum$High,2),NA)
                  sum <- sum[,c("var","level","Low","High","Diff.","or","ci")]
                  
                  
                  anova <- data.frame(anova(mod))
                  anova$pval <- format(round(anova$P,4),nsmall=4,scientific=FALSE)
                  if(any(anova$pval=='0.0000')) {anova$pval[anova$pval=='0.0000']<-'< 0.0001'}
                  anova$var <- rownames(anova)
                  anova <- anova[rownames(anova) %in% mod$Design$name,c("pval","var")]
                  
                  labels <- data.frame(var = mod$Design$name, Variable = mod$Design$label)
                  
                  Table <- data.frame(join(anova,sum,by="var", type="inner"))
                  Table$pval[duplicated(Table$var)] <- NA
                  Table <- data.frame(join(Table, labels,by="var", type= "inner"))
                  
                  Table <- setnames(Table, old=c("pval","var","level","Low","High","Diff.","or","ci","Variable"), 
new=c("P-value","var","Level","Low","High","Diff.","OR","95% CI","Variable"))
                  
                  Table <- Table[,c("Variable","Level","Low","High","OR","95% CI","P-value")]
                  #Table$Level <- ifelse(is.na(Table$Level),Table$`Diff.`,Table$Level)
                  
}
               
               
```

## Associations
Risk Alleles as Categorical (Low-Risk, High-Risk)

### Average uACR
```{r CPM uACR, include = FALSE}

df2 <- df[!is.na(df$risk_alleles.f) & !is.na(df$avg_uacr), ]

# data distribution
dd <- datadist(df2)
options(datadist='dd')
set.seed(20200407)

a1 <- aregImpute(~ avg_uacr + risk_alleles.f + age + sex.f + ethnicity.f + duration_on_art + tenofovir.f + level_art_trt.f + cd4_sqrt + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + mean_bmi + jnc_bp.f, data = df2, nk=3, n.impute = 20)

m1 <- fit.mult.impute(avg_uacr ~ risk_alleles.f + rcs(age,3) + sex.f + ethnicity.f + rcs(duration_on_art,3) + tenofovir.f + level_art_trt.f + rcs(cd4_sqrt,3) + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + rcs(mean_bmi,3) + jnc_bp.f, fitter=orm, a1, data=df2)

m1r <- model.est.orm(m1) #[c(9,2:6,8)]

## Multivariate risk alleles category - all other kept constant (m1) [manual creation]
# median: age (40), duration (9), cd4 (21.88607), mean_bmi (23.04573)
# mode: Female (1253), Hausa/Fulani (1383), Tenofovir-yes (1047), Level-first line (1563), vl <=200 (1535), dm-no (1856), htn-no (1620), hf-no (1892), others-no (1479), ace_arb-no (1805), smoke-no (1798), jnc_bp-normal (1207)
new.data <- matrix(NA, ncol=length(m1$Design$colnames), nrow=length(levels(df2$risk_alleles.f)))
colnames(new.data) <- m1$Design$colnames
new.data[,"risk_alleles.f=High-Risk"] <- c(0,1)
new.data[1, 2:3] <- rcspline.eval(median(df2$age, na.rm=TRUE),  knots=m1$Design$parms$age, inclx=TRUE)
new.data[2, 2:3] <- rcspline.eval(median(df2$age, na.rm=TRUE),  knots=m1$Design$parms$age, inclx=TRUE)
new.data[,"sex.f=Female"] <- 1
new.data[,"ethnicity.f=Igbo"] <- 0
new.data[,"ethnicity.f=Yoruba"] <- 0
new.data[,"ethnicity.f=Other"] <- 0
new.data[1, 8:9] <- rcspline.eval(median(df2$duration_on_art, na.rm=TRUE),  knots=m1$Design$parms$duration_on_art, inclx=TRUE)
new.data[2, 8:9] <- rcspline.eval(median(df2$duration_on_art, na.rm=TRUE),  knots=m1$Design$parms$duration_on_art, inclx=TRUE)
new.data[,"tenofovir.f=Yes"] <- 1
new.data[,"level_art_trt.f=Second Line"] <- 0
new.data[1, 12:13] <- rcspline.eval(median(df2$cd4_sqrt, na.rm=TRUE),  knots=m1$Design$parms$cd4_sqrt, inclx=TRUE)
new.data[2, 12:13] <- rcspline.eval(median(df2$cd4_sqrt, na.rm=TRUE),  knots=m1$Design$parms$cd4_sqrt, inclx=TRUE)
new.data[,"vl.f=>200"] <- 0
new.data[,"dm.f=Yes"] <- 0
new.data[,"htn.f=Yes"] <- 0
new.data[,"hf.f=Yes"] <- 0
new.data[,"others.f=Yes"] <- 0
new.data[,"ace_arb.f=Yes"] <- 0
new.data[,"smoke.f=Yes"] <- 0
new.data[1, 21:22] <- rcspline.eval(median(df2$mean_bmi, na.rm=TRUE),  knots=m1$Design$parms$mean_bmi, inclx=TRUE)
new.data[2, 21:22] <- rcspline.eval(median(df2$mean_bmi, na.rm=TRUE),  knots=m1$Design$parms$mean_bmi, inclx=TRUE)
new.data[,"jnc_bp.f=Pre-hypertension"] <- 0
new.data[,"jnc_bp.f=Stage 1 Hypertension"] <- 0
new.data[,"jnc_bp.f=Stage 2 Hypertension"] <- 0

# estimates
uacr.est.mean <- data.frame(mean.orm(m1, new.data, se=TRUE))
uacr.est.mean <- data.frame(risk = c("Low-Risk","High-Risk"), uacr.est.mean)

uacr.est.q <- data.frame(quantile.orm(m1, new.data, probs=0.5, se=TRUE))
uacr.est.q <- data.frame(risk = c("Low-Risk","High-Risk"), uacr.est.q)

uacr.est.cdf1 <- data.frame(cdf.orm(m1, new.data, at.y=30, se=TRUE))
uacr.est.cdf1 <- data.frame(risk = c("Low-Risk","High-Risk"), uacr.est.cdf1, est2 = 1-uacr.est.cdf1$est,se2 = uacr.est.cdf1$se, lb2 = 1-uacr.est.cdf1$ub, ub2 = 1-uacr.est.cdf1$lb)

uacr.est.cdf2 <- data.frame(cdf.orm(m1, new.data, at.y=300, se=TRUE))
uacr.est.cdf2 <- data.frame(risk = c("Low-Risk","High-Risk"), uacr.est.cdf2, est2 = 1-uacr.est.cdf2$est,se2 = uacr.est.cdf2$se, lb2 = 1-uacr.est.cdf2$ub, ub2 = 1-uacr.est.cdf2$lb)

```

```{r}
print(m1, coef=FALSE, digits=2)

m1r %>% kable(escape=F, digits = 4) %>% trimws %>% kable_styling(c("striped","bordered")) %>%
  footnote(general_title = "Reference Levels:", general = "Risk Allele Category (Low-Risk), Sex (Male), Ethnicity (Hausa/Fulani), Taking Tenofovir? (No), Level of ART Treatment (First Line), Recent Viral Load Count (<=200), Have diabetes? (No), Have hypertension? (No), Have heart failure? (No), Have other comorbid conditions? (No), Taking ACE/ARB? (No), Do you smoke cigarettes? (No), JNC BP Classification (Normal)")
```

**P-values are from Likelihood Ratio Test. Tests significance of entire variable (which is why you see only 1 p-value for ethnicity and JNC Blood Pressure).**

Interpretation: 
  
* Continuous variables (Low/High columns in the table are the two "groups" we are comparing): Age - For those who are 40 years old, the odds of having a higher uACR value is multipled by 1.08 compared to those who are 30. CD4 Sqrt (square low/high to revert back to CD4 increase) - Those with a CD4 count of 500 (22.36^2), have 0.88 times the odds of having a higher uACR value than those who have a CD4 count of 200 (14.14^2).

* Categorical variables: Risk Allele Category - The odds of someone with High Risk Allele status having higher levels of uACR is 2.16 times the odds of those with low risk allele status having higher uACR values. 

```{r}
uacr.est.mean %>% kable(escape=F, caption = "uACR Est Mean") %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "",general = "All other variables adjusted at median or mode")
```

Interpretation: 

* The expected average uACR value for those in the Low-Risk Allele category is 64.76.

```{r}
uacr.est.q %>% kable(escape=F, caption = "uACR Est Quantile (Median)") %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "",general = "All other variables adjusted at median or mode")
```

Interpretation: 

* The expected median uACR value for those in the Low-Risk Allele category is 21.99.

```{r}
uacr.est.cdf1 %>% kable(escape=F, caption = "uACR Est CDF (=30)") %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "",general = "All other variables adjusted at median or mode") %>% column_spec(c(6:9),bold=T)
```

Interpretation: 
  
* Unbolded - The probability that the value of uACR is < 30 or normal for Low-Risk allele category is 60.01% (95% CI [55.31%, 64.53%])
* Bolded - The probability that the value of uACR is >30 or have non-normal values (micro or macro) for Low-Risk allele category is 39.99% (95% CI [35.47%,44.69%])

```{r}
uacr.est.cdf2 %>% kable(escape=F, caption = "uACR Est CDF (=300)") %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "",general = "All other variables adjusted at median or mode") %>% column_spec(c(6:9),bold=T)
```

Interpretation: 

* Unbolded - The probability that the value of uACR is < 300 for Low-Risk allele category is 97.29% (95% CI [96.38%, 97.97%])
* Bolded - The probability that the value of uACR is >300 or have macro-albuminuria for Low-Risk allele category is 2.71% (95% CI [2.03%,3.62%])

### eGFR
```{r CPM eGFR, include=FALSE}

df2 <- df[!is.na(df$risk_alleles.f) & !is.na(df$egfr), ]

# data distribution
dd <- datadist(df2)
options(datadist='dd')
set.seed(20200407)

a2 <- aregImpute(~ egfr + risk_alleles.f + age + sex.f + ethnicity.f + duration_on_art + tenofovir.f + level_art_trt.f + cd4_sqrt + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + mean_bmi + jnc_bp.f, data = df2 , nk=3, n.impute = 20)

m2 <- fit.mult.impute(egfr ~ risk_alleles.f + rcs(age,3) + sex.f + ethnicity.f + rcs(duration_on_art,3) + tenofovir.f + level_art_trt.f + rcs(cd4_sqrt,3) + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + rcs(mean_bmi,3) + jnc_bp.f, fitter=orm, a2, data=df2)

m2r <- model.est.orm(m2) #[c(9,2:6,8)]

## Multivariate risk alleles category - all other kept constant (m1) [manual creation]
# median: age (40), duration (9), cd4 (21.88607), mean_bmi (23.04573)
# mode: Female (1253), Hausa/Fulani (1383), Tenofovir-yes (1047), Level-first line (1563), vl <=200 (1535), dm-no (1856), htn-no (1620), hf-no (1892), others-no (1479), ace_arb-no (1805), smoke-no (1798), jnc_bp-normal (1207)
new.data <- matrix(NA, ncol=length(m2$Design$colnames), nrow=length(levels(df2$risk_alleles.f)))
colnames(new.data) <- m2$Design$colnames
new.data[,"risk_alleles.f=High-Risk"] <- c(0,1)
new.data[1, 2:3] <- rcspline.eval(median(df2$age, na.rm=TRUE),  knots=m2$Design$parms$age, inclx=TRUE)
new.data[2, 2:3] <- rcspline.eval(median(df2$age, na.rm=TRUE),  knots=m2$Design$parms$age, inclx=TRUE)
new.data[,"sex.f=Female"] <- 1
new.data[,"ethnicity.f=Igbo"] <- 0
new.data[,"ethnicity.f=Yoruba"] <- 0
new.data[,"ethnicity.f=Other"] <- 0
new.data[1, 8:9] <- rcspline.eval(median(df2$duration_on_art, na.rm=TRUE),  knots=m2$Design$parms$duration_on_art, inclx=TRUE)
new.data[2, 8:9] <- rcspline.eval(median(df2$duration_on_art, na.rm=TRUE),  knots=m2$Design$parms$duration_on_art, inclx=TRUE)
new.data[,"tenofovir.f=Yes"] <- 1
new.data[,"level_art_trt.f=Second Line"] <- 0
new.data[1, 12:13] <- rcspline.eval(median(df2$cd4_sqrt, na.rm=TRUE),  knots=m2$Design$parms$cd4_sqrt, inclx=TRUE)
new.data[2, 12:13] <- rcspline.eval(median(df2$cd4_sqrt, na.rm=TRUE),  knots=m2$Design$parms$cd4_sqrt, inclx=TRUE)
new.data[,"vl.f=>200"] <- 0
new.data[,"dm.f=Yes"] <- 0
new.data[,"htn.f=Yes"] <- 0
new.data[,"hf.f=Yes"] <- 0
new.data[,"others.f=Yes"] <- 0
new.data[,"ace_arb.f=Yes"] <- 0
new.data[,"smoke.f=Yes"] <- 0
new.data[1, 21:22] <- rcspline.eval(median(df2$mean_bmi, na.rm=TRUE),  knots=m2$Design$parms$mean_bmi, inclx=TRUE)
new.data[2, 21:22] <- rcspline.eval(median(df2$mean_bmi, na.rm=TRUE),  knots=m2$Design$parms$mean_bmi, inclx=TRUE)
new.data[,"jnc_bp.f=Pre-hypertension"] <- 0
new.data[,"jnc_bp.f=Stage 1 Hypertension"] <- 0
new.data[,"jnc_bp.f=Stage 2 Hypertension"] <- 0

# estimates
egfr.est.mean <- data.frame(mean.orm(m2, new.data, se=TRUE))
egfr.est.mean <- data.frame(risk = c("Low-Risk","High-Risk"), egfr.est.mean)

egfr.est.q <- data.frame(quantile.orm(m2, new.data, probs=0.5, se=TRUE))
egfr.est.q <- data.frame(risk = c("Low-Risk","High-Risk"), egfr.est.q)

egfr.est.cdf <- data.frame(cdf.orm(m2, new.data, at.y=60, se=TRUE))
egfr.est.cdf <- data.frame(risk = c("Low-Risk","High-Risk"), egfr.est.cdf, est2 = 1-egfr.est.cdf$est,se2 = egfr.est.cdf$se, lb2 = 1-egfr.est.cdf$ub, ub2 = 1-egfr.est.cdf$lb)

```


```{r}
#results
print(m2, coef=FALSE, digits=2)

m2r %>% kable(escape=F, digits = 4) %>% trimws %>% kable_styling(c("striped","bordered")) %>%
  footnote(general_title = "Reference Levels:", general = "Risk Allele Category (Low-Risk), Sex (Male), Ethnicity (Hausa/Fulani), Taking Tenofovir? (No), Level of ART Treatment (First Line), Recent Viral Load Count (<=200), Have diabetes? (No), Have hypertension? (No), Have heart failure? (No), Have other comorbid conditions? (No), Taking ACE/ARB? (No), Do you smoke cigarettes? (No), JNC BP Classification (Normal)")
```


```{r}

egfr.est.mean %>% kable(escape=F, caption = "eGFR Est Mean") %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "",general = "All other variables adjusted at median or mode")

egfr.est.q %>% kable(escape=F, caption = "eGFR Est Quantile (Median)") %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "",general = "All other variables adjusted at median or mode")

egfr.est.cdf %>% kable(escape=F, caption = "eGFR Est CDF (=60)") %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "",general = "All other variables adjusted at median or mode") %>% column_spec(c(2:5),bold=T)

```

Interpretation:

* Bolded - The probability that the value of eGFR is < 60 or have non-normal values (kidney failure)
* Unbolded - The probability that the value of eGFR is >60 or normal

### Serum Creatinine
```{r CPM creat, include=FALSE}

df2 <- df[!is.na(df$risk_alleles.f) & !is.na(df$serum_creatinine), ]

# data distribution
dd <- datadist(df2)
options(datadist='dd')
set.seed(20200407)

a3 <- aregImpute(~ serum_creatinine + risk_alleles.f + age + sex.f + ethnicity.f + duration_on_art + tenofovir.f + level_art_trt.f + cd4_sqrt + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + mean_bmi + jnc_bp.f, data = df2, nk=3, n.impute = 20)

m3 <- fit.mult.impute(serum_creatinine ~ risk_alleles.f + rcs(age,3) + sex.f + ethnicity.f + rcs(duration_on_art,3) + tenofovir.f + level_art_trt.f + rcs(cd4_sqrt,3) + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + rcs(mean_bmi,3) + jnc_bp.f, fitter=orm, a3, data=df2)

m3r <- model.est.orm(m3) #[c(9,2:6,8)]

## Multivariate risk alleles category - all other kept constant (m1) [manual creation]
# median: age (40), duration (9), cd4 (21.88607), mean_bmi (23.04573)
# mode: Female (1253), Hausa/Fulani (1383), Tenofovir-yes (1047), Level-first line (1563), vl <=200 (1535), dm-no (1856), htn-no (1620), hf-no (1892), others-no (1479), ace_arb-no (1805), smoke-no (1798), jnc_bp-normal (1207)
new.data <- matrix(NA, ncol=length(m3$Design$colnames), nrow=length(levels(df2$risk_alleles.f)))
colnames(new.data) <- m3$Design$colnames
new.data[,"risk_alleles.f=High-Risk"] <- c(0,1)
new.data[1, 2:3] <- rcspline.eval(median(df2$age, na.rm=TRUE),  knots=m3$Design$parms$age, inclx=TRUE)
new.data[2, 2:3] <- rcspline.eval(median(df2$age, na.rm=TRUE),  knots=m3$Design$parms$age, inclx=TRUE)
new.data[,"sex.f=Female"] <- 1
new.data[,"ethnicity.f=Igbo"] <- 0
new.data[,"ethnicity.f=Yoruba"] <- 0
new.data[,"ethnicity.f=Other"] <- 0
new.data[1, 8:9] <- rcspline.eval(median(df2$duration_on_art, na.rm=TRUE),  knots=m3$Design$parms$duration_on_art, inclx=TRUE)
new.data[2, 8:9] <- rcspline.eval(median(df2$duration_on_art, na.rm=TRUE),  knots=m3$Design$parms$duration_on_art, inclx=TRUE)
new.data[,"tenofovir.f=Yes"] <- 1
new.data[,"level_art_trt.f=Second Line"] <- 0
new.data[1, 12:13] <- rcspline.eval(median(df2$cd4_sqrt, na.rm=TRUE),  knots=m3$Design$parms$cd4_sqrt, inclx=TRUE)
new.data[2, 12:13] <- rcspline.eval(median(df2$cd4_sqrt, na.rm=TRUE),  knots=m3$Design$parms$cd4_sqrt, inclx=TRUE)
new.data[,"vl.f=>200"] <- 0
new.data[,"dm.f=Yes"] <- 0
new.data[,"htn.f=Yes"] <- 0
new.data[,"hf.f=Yes"] <- 0
new.data[,"others.f=Yes"] <- 0
new.data[,"ace_arb.f=Yes"] <- 0
new.data[,"smoke.f=Yes"] <- 0
new.data[1, 21:22] <- rcspline.eval(median(df2$mean_bmi, na.rm=TRUE),  knots=m3$Design$parms$mean_bmi, inclx=TRUE)
new.data[2, 21:22] <- rcspline.eval(median(df2$mean_bmi, na.rm=TRUE),  knots=m3$Design$parms$mean_bmi, inclx=TRUE)
new.data[,"jnc_bp.f=Pre-hypertension"] <- 0
new.data[,"jnc_bp.f=Stage 1 Hypertension"] <- 0
new.data[,"jnc_bp.f=Stage 2 Hypertension"] <- 0

#estimates
sc.est.mean <- data.frame(mean.orm(m3, new.data, se=TRUE))
sc.est.mean <- data.frame(risk = c("Low-Risk","High-Risk"), sc.est.mean)

sc.est.q <- data.frame(quantile.orm(m3, new.data, probs=0.5, se=TRUE))
sc.est.q <- data.frame(risk = c("Low-Risk","High-Risk"), sc.est.q)

sc.est.cdf <- data.frame(cdf.orm(m3, new.data, at.y=1.2, se=TRUE))
sc.est.cdf <- data.frame(risk = c("Low-Risk","High-Risk"), sc.est.cdf, est2 = 1-sc.est.cdf$est,se2 = sc.est.cdf$se, lb2 = 1-sc.est.cdf$ub, ub2 = 1-sc.est.cdf$lb)

```

```{r}
#results
print(m3, coef=FALSE, digits=2)

m3r %>% kable(escape=F, digits = 4) %>% trimws %>% kable_styling(c("striped","bordered")) %>%
  footnote(general_title = "Reference Levels:", general = "Risk Allele Category (Low-Risk), Sex (Male), Ethnicity (Hausa/Fulani), Taking Tenofovir? (No), Level of ART Treatment (First Line), Recent Viral Load Count (<=200), Have diabetes? (No), Have hypertension? (No), Have heart failure? (No), Have other comorbid conditions? (No), Taking ACE/ARB? (No), Do you smoke cigarettes? (No), JNC BP Classification (Normal)")

```


```{r}
sc.est.mean %>% kable(escape=F, caption = "Serum Creatinine Est Mean") %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "",general = "All other variables adjusted at median or mode")

sc.est.q %>% kable(escape=F, caption = "Serum Creatinine Est Quantile (Median)") %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "",general = "All other variables adjusted at median or mode")

sc.est.cdf %>% kable(escape=F, caption = "Serum Creatinine Est CDF (=1.2 mg/dL)") %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "",general = "All other variables adjusted at median or mode") %>% column_spec(c(6:9),bold=T)

```

Interpretation: 

* Unbolded - The probability that the value of serum creatinine is <1.2 (normal)
* Bolded - The probability that the value of serum creatinine is >1.2 (non-normal)

# Secondary Analyses
Risk Alleles as categorical (0,1,2)

```{r}
model.est.orm <- function(mod) {  
  
                  sum <- data.frame(summary(mod,age=c(30,40),duration_on_art=c(1,2),cd4_sqrt=c(sqrt(200),sqrt(500)),mean_bmi=c(20,25),risk_alleles_3.f="No alleles",sex.f="Male",ethnicity.f="Hausa/Fulani",tenofovir.f="No",level_art_trt.f="First Line",vl.f="<= 200",dm.f="No",htn.f="No",hf.f="No",others.f="No",ace_arb.f="No",smoke.f="No",jnc_bp.f="Normal"))
                  sum$or <- format(round(sum$Effect,2),nsmall=2,scientific=FALSE,trim=TRUE)
                  sum$lb <- format(round(sum$Lower.0.95,2),nsmall=2,scientific=FALSE,trim=TRUE)
                  #sum$ub <- format(round(sum$Upper.0.95,2),nsmall=2,scientific=FALSE,trim=TRUE)
                  sum$ub <- format(round(ifelse(sum$Upper.0.95>=100.00,100.00,sum$Upper.0.95),2),nsmall=2,scientific=FALSE,trim=TRUE)
                  sum$ci <- paste0("(",sum$lb,", ",sum$ub,")")
      
                  sum <- data.frame(var = ifelse(startsWith(rownames(sum), "X"),NA,sub("^([^.]*.[^.]*).*", "\\1", rownames(sum))),sum)
                  sum <- fill(sum,"var",.direction="down")
                  sum <- sum[sum$var %in% mod$Design$name & sum$Type==2,]
                  rownames(sum) <- c()
                  
                  level <- c(NA,NA,NA,NA,"1 allele","2 alleles","Female","Igbo","Yoruba","Other","Yes","Second Line",">200","Yes","Yes","Yes","Yes","Yes","Yes","Pre-hypertension","Stage 1 Hypertension","Stage 2 Hypertension")
                  
                  sum <- data.frame(sum, level)
                  sum$Low <- ifelse(is.na(sum$level),round(sum$Low,2),NA)
                  sum$High <- ifelse(is.na(sum$level),round(sum$High,2),NA)
                  sum <- sum[,c("var","level","Low","High","Diff.","or","ci")]
                  
                  
                  anova <- data.frame(anova(mod))
                  anova$pval <- format(round(anova$P,4),nsmall=4,scientific=FALSE)
                  if(any(anova$pval=='0.0000')) {anova$pval[anova$pval=='0.0000']<-'< 0.0001'}
                  anova$var <- rownames(anova)
                  anova <- anova[rownames(anova) %in% mod$Design$name,c("pval","var")]
                  
                  labels <- data.frame(var = mod$Design$name, Variable = mod$Design$label)
                  
                  Table <- data.frame(join(anova,sum,by="var", type="inner"))
                  Table$pval[duplicated(Table$var)] <- NA
                  Table <- data.frame(join(Table, labels,by="var", type= "inner"))
                  
                  Table <- setnames(Table, old=c("pval","var","level","Low","High","Diff.","or","ci","Variable"), 
new=c("P-value","var","Level","Low","High","Diff.","OR","95% CI","Variable"))
                  
                  Table <- Table[,c("Variable","Level","Low","High","OR","95% CI","P-value")]
                  
}
               
               
```

## Associations
### Average uACR
```{r CPM uACR2, include=FALSE}

df2 <- df[!is.na(df$risk_alleles_3.f) & !is.na(df$avg_uacr), ]

# data distribution
dd <- datadist(df2)
options(datadist='dd')
set.seed(20200407)

a1b <- aregImpute(~ avg_uacr + risk_alleles_3.f + age + sex.f + ethnicity.f + duration_on_art + tenofovir.f + level_art_trt.f + cd4_sqrt + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + mean_bmi + jnc_bp.f, data = df2, nk=3, n.impute = 20)

m1b <- fit.mult.impute(avg_uacr ~ risk_alleles_3.f + rcs(age,3) + sex.f + ethnicity.f + rcs(duration_on_art,3) + tenofovir.f + level_art_trt.f + rcs(cd4_sqrt,3) + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + rcs(mean_bmi,3) + jnc_bp.f, fitter=orm, a1b, data=df2)

m1br <- model.est.orm(m1b) #[c(9,2:6,8)]

## Multivariate risk alleles category - all other kept constant (m1) [manual creation]
# median: age (40), duration (9), cd4 (21.88607), mean_bmi (23.04573)
# mode: Female (1253), Hausa/Fulani (1383), Tenofovir-yes (1047), Level-first line (1563), vl <=200 (1535), dm-no (1856), htn-no (1620), hf-no (1892), others-no (1479), ace_arb-no (1805), smoke-no (1798), jnc_bp-normal (1207)
new.data <- matrix(NA, ncol=length(m1b$Design$colnames), nrow=length(levels(df2$risk_alleles_3.f)))
colnames(new.data) <- m1b$Design$colnames
new.data[,"risk_alleles_3.f=1 allele"] <- c(0,1,0)
new.data[,"risk_alleles_3.f=2 alleles"] <- c(0,0,1)
new.data[1, 3:4] <- rcspline.eval(median(df2$age, na.rm=TRUE),  knots=m1b$Design$parms$age, inclx=TRUE)
new.data[2, 3:4] <- rcspline.eval(median(df2$age, na.rm=TRUE),  knots=m1b$Design$parms$age, inclx=TRUE)
new.data[3, 3:4] <- rcspline.eval(median(df2$age, na.rm=TRUE),  knots=m1b$Design$parms$age, inclx=TRUE)
new.data[,"sex.f=Female"] <- 1
new.data[,"ethnicity.f=Igbo"] <- 0
new.data[,"ethnicity.f=Yoruba"] <- 0
new.data[,"ethnicity.f=Other"] <- 0
new.data[1, 9:10] <- rcspline.eval(median(df2$duration_on_art, na.rm=TRUE),  knots=m1b$Design$parms$duration_on_art, inclx=TRUE)
new.data[2, 9:10] <- rcspline.eval(median(df2$duration_on_art, na.rm=TRUE),  knots=m1b$Design$parms$duration_on_art, inclx=TRUE)
new.data[3, 9:10] <- rcspline.eval(median(df2$duration_on_art, na.rm=TRUE),  knots=m1b$Design$parms$duration_on_art, inclx=TRUE)
new.data[,"tenofovir.f=Yes"] <- 1
new.data[,"level_art_trt.f=Second Line"] <- 0
new.data[1, 13:14] <- rcspline.eval(median(df2$cd4_sqrt, na.rm=TRUE),  knots=m1b$Design$parms$cd4_sqrt, inclx=TRUE)
new.data[2, 13:14] <- rcspline.eval(median(df2$cd4_sqrt, na.rm=TRUE),  knots=m1b$Design$parms$cd4_sqrt, inclx=TRUE)
new.data[3, 13:14] <- rcspline.eval(median(df2$cd4_sqrt, na.rm=TRUE),  knots=m1b$Design$parms$cd4_sqrt, inclx=TRUE)
new.data[,"vl.f=>200"] <- 0
new.data[,"dm.f=Yes"] <- 0
new.data[,"htn.f=Yes"] <- 0
new.data[,"hf.f=Yes"] <- 0
new.data[,"others.f=Yes"] <- 0
new.data[,"ace_arb.f=Yes"] <- 0
new.data[,"smoke.f=Yes"] <- 0
new.data[1, 22:23] <- rcspline.eval(median(df2$mean_bmi, na.rm=TRUE),  knots=m1b$Design$parms$mean_bmi, inclx=TRUE)
new.data[2, 22:23] <- rcspline.eval(median(df2$mean_bmi, na.rm=TRUE),  knots=m1b$Design$parms$mean_bmi, inclx=TRUE)
new.data[3, 22:23] <- rcspline.eval(median(df2$mean_bmi, na.rm=TRUE),  knots=m1b$Design$parms$mean_bmi, inclx=TRUE)
new.data[,"jnc_bp.f=Pre-hypertension"] <- 0
new.data[,"jnc_bp.f=Stage 1 Hypertension"] <- 0
new.data[,"jnc_bp.f=Stage 2 Hypertension"] <- 0

#estimates
uacr.est.mean.b <- data.frame(mean.orm(m1b, new.data, se=TRUE))
uacr.est.mean.b <- data.frame(alleles = c("No alleles","1 allele","2 alleles"), uacr.est.mean.b)

uacr.est.q.b <- data.frame(quantile.orm(m1b, new.data, probs=0.5, se=TRUE))
uacr.est.q.b <- data.frame(alleles = c("No alleles","1 allele","2 alleles"), uacr.est.q.b)

uacr.est.cdf1.b <- data.frame(cdf.orm(m1b, new.data, at.y=30, se=TRUE))
uacr.est.cdf1.b <- data.frame(alleles = c("No alleles","1 allele","2 alleles"), uacr.est.cdf1.b, est2 = 1-uacr.est.cdf1.b$est, se2 = uacr.est.cdf1.b$se, lb2 = 1-uacr.est.cdf1.b$ub, ub2 = 1-uacr.est.cdf1.b$lb)

uacr.est.cdf2.b <- data.frame(cdf.orm(m1b, new.data, at.y=300, se=TRUE))
uacr.est.cdf2.b <- data.frame(alleles = c("No alleles","1 allele","2 alleles"), uacr.est.cdf2.b, est2 = 1-uacr.est.cdf2.b$est, se2 = uacr.est.cdf2.b$se, lb2 = 1-uacr.est.cdf2.b$ub, ub2 = 1-uacr.est.cdf2.b$lb)

```

```{r}
#results
print(m1b, coef=FALSE, digits=2)

m1br %>% kable(escape=F, digits = 4) %>% trimws %>% kable_styling(c("striped","bordered")) %>%
  footnote(general_title = "Reference Levels:", general = "Risk Alleles (No alleles), Sex (Male), Ethnicity (Hausa/Fulani), Taking Tenofovir? (No), Level of ART Treatment (First Line), Recent Viral Load Count (<=200), Have diabetes? (No), Have hypertension? (No), Have heart failure? (No), Have other comorbid conditions? (No), Taking ACE/ARB? (No), Do you smoke cigarettes? (No), JNC BP Classification (Normal)")
```

```{r}
uacr.est.mean.b %>% kable(escape=F, caption = "uACR Est Mean") %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "",general = "All other variables adjusted at median or mode")

uacr.est.q.b %>% kable(escape=F, caption = "uACR Est Quantile (Median)") %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "",general = "All other variables adjusted at median or mode")

uacr.est.cdf1.b %>% kable(escape=F, caption = "uACR Est CDF (=30)") %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "",general = "All other variables adjusted at median or mode") %>% column_spec(c(6:9),bold=T)

uacr.est.cdf2.b %>% kable(escape=F, caption = "uACR Est CDF (=300)") %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "",general = "All other variables adjusted at median or mode") %>% column_spec(c(6:9),bold=T)

```


### eGFR
```{r CPM eGFR2, include=FALSE}

df2 <- df[!is.na(df$risk_alleles_3.f) & !is.na(df$egfr), ]

# data distribution
dd <- datadist(df2)
options(datadist='dd')
set.seed(20200407)

a2b <- aregImpute(~ egfr + risk_alleles_3.f + age + sex.f + ethnicity.f + duration_on_art + tenofovir.f + level_art_trt.f + cd4_sqrt + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + mean_bmi + jnc_bp.f, data = df2, nk=3, n.impute = 20)

m2b <- fit.mult.impute(egfr ~ risk_alleles_3.f + rcs(age,3) + sex.f + ethnicity.f + rcs(duration_on_art,3) + tenofovir.f + level_art_trt.f + rcs(cd4_sqrt,3) + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + rcs(mean_bmi,3) + jnc_bp.f, fitter=orm, a2b, data=df2)

m2br <- model.est.orm(m2b) #[c(9,2:6,8)]

## Multivariate risk alleles category - all other kept constant (m1) [manual creation]
# median: age (40), duration (9), cd4 (21.88607), mean_bmi (23.04573)
# mode: Female (1253), Hausa/Fulani (1383), Tenofovir-yes (1047), Level-first line (1563), vl <=200 (1535), dm-no (1856), htn-no (1620), hf-no (1892), others-no (1479), ace_arb-no (1805), smoke-no (1798), jnc_bp-normal (1207)
new.data <- matrix(NA, ncol=length(m2b$Design$colnames), nrow=length(levels(df2$risk_alleles_3.f)))
colnames(new.data) <- m2b$Design$colnames
new.data[,"risk_alleles_3.f=1 allele"] <- c(0,1,0)
new.data[,"risk_alleles_3.f=2 alleles"] <- c(0,0,1)
new.data[1, 3:4] <- rcspline.eval(median(df2$age, na.rm=TRUE),  knots=m2b$Design$parms$age, inclx=TRUE)
new.data[2, 3:4] <- rcspline.eval(median(df2$age, na.rm=TRUE),  knots=m2b$Design$parms$age, inclx=TRUE)
new.data[3, 3:4] <- rcspline.eval(median(df2$age, na.rm=TRUE),  knots=m2b$Design$parms$age, inclx=TRUE)
new.data[,"sex.f=Female"] <- 1
new.data[,"ethnicity.f=Igbo"] <- 0
new.data[,"ethnicity.f=Yoruba"] <- 0
new.data[,"ethnicity.f=Other"] <- 0
new.data[1, 9:10] <- rcspline.eval(median(df2$duration_on_art, na.rm=TRUE),  knots=m2b$Design$parms$duration_on_art, inclx=TRUE)
new.data[2, 9:10] <- rcspline.eval(median(df2$duration_on_art, na.rm=TRUE),  knots=m2b$Design$parms$duration_on_art, inclx=TRUE)
new.data[3, 9:10] <- rcspline.eval(median(df2$duration_on_art, na.rm=TRUE),  knots=m2b$Design$parms$duration_on_art, inclx=TRUE)
new.data[,"tenofovir.f=Yes"] <- 1
new.data[,"level_art_trt.f=Second Line"] <- 0
new.data[1, 13:14] <- rcspline.eval(median(df2$cd4_sqrt, na.rm=TRUE),  knots=m2b$Design$parms$cd4_sqrt, inclx=TRUE)
new.data[2, 13:14] <- rcspline.eval(median(df2$cd4_sqrt, na.rm=TRUE),  knots=m2b$Design$parms$cd4_sqrt, inclx=TRUE)
new.data[3, 13:14] <- rcspline.eval(median(df2$cd4_sqrt, na.rm=TRUE),  knots=m2b$Design$parms$cd4_sqrt, inclx=TRUE)
new.data[,"vl.f=>200"] <- 0
new.data[,"dm.f=Yes"] <- 0
new.data[,"htn.f=Yes"] <- 0
new.data[,"hf.f=Yes"] <- 0
new.data[,"others.f=Yes"] <- 0
new.data[,"ace_arb.f=Yes"] <- 0
new.data[,"smoke.f=Yes"] <- 0
new.data[1, 22:23] <- rcspline.eval(median(df2$mean_bmi, na.rm=TRUE),  knots=m2b$Design$parms$mean_bmi, inclx=TRUE)
new.data[2, 22:23] <- rcspline.eval(median(df2$mean_bmi, na.rm=TRUE),  knots=m2b$Design$parms$mean_bmi, inclx=TRUE)
new.data[3, 22:23] <- rcspline.eval(median(df2$mean_bmi, na.rm=TRUE),  knots=m2b$Design$parms$mean_bmi, inclx=TRUE)
new.data[,"jnc_bp.f=Pre-hypertension"] <- 0
new.data[,"jnc_bp.f=Stage 1 Hypertension"] <- 0
new.data[,"jnc_bp.f=Stage 2 Hypertension"] <- 0

#estimates
egfr.est.mean.b <- data.frame(mean.orm(m2b, new.data, se=TRUE))
egfr.est.mean.b <- data.frame(alleles = c("No alleles","1 allele","2 alleles"), egfr.est.mean.b)

egfr.est.q.b <- data.frame(quantile.orm(m2b, new.data, probs=0.5, se=TRUE))
egfr.est.q.b <- data.frame(alleles = c("No alleles","1 allele","2 alleles"), egfr.est.q.b)

egfr.est.cdf.b <- data.frame(cdf.orm(m2b, new.data, at.y=60, se=TRUE))
egfr.est.cdf.b <- data.frame(alleles = c("No alleles","1 allele","2 alleles"), egfr.est.cdf.b, est2 = 1-egfr.est.cdf.b$est,se2 = egfr.est.cdf.b$se, lb2 = 1-egfr.est.cdf.b$ub, ub2 = 1-egfr.est.cdf.b$lb)

```

```{r}
#results
print(m2b, coef=FALSE, digits=2)

m2br %>% kable(escape=F, digits = 4) %>% trimws %>% kable_styling(c("striped","bordered")) %>%
  footnote(general_title = "Reference Levels:", general = "Risk Alleles (No alleles), Sex (Male), Ethnicity (Hausa/Fulani), Taking Tenofovir? (No), Level of ART Treatment (First Line), Recent Viral Load Count (<=200), Have diabetes? (No), Have hypertension? (No), Have heart failure? (No), Have other comorbid conditions? (No), Taking ACE/ARB? (No), Do you smoke cigarettes? (No), JNC BP Classification (Normal)")
```

```{r}
egfr.est.mean.b %>% kable(escape=F, caption = "eGFR Est Mean") %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "",general = "All other variables adjusted at median or mode")

egfr.est.q.b %>% kable(escape=F, caption = "eGFR Est Quantile (Median)") %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "",general = "All other variables adjusted at median or mode")

egfr.est.cdf.b %>% kable(escape=F, caption = "eGFR Est CDF (=60)") %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "",general = "All other variables adjusted at median or mode") %>% column_spec(c(6:9),bold=T)

```


### Serum Creatinine
```{r CPM creat2, include=FALSE}

df2 <- df[!is.na(df$risk_alleles_3.f) & !is.na(df$serum_creatinine), ]

# data distribution
dd <- datadist(df2)
options(datadist='dd')
set.seed(20200407)

a3b <- aregImpute(~ serum_creatinine + risk_alleles_3.f + age + sex.f + ethnicity.f + duration_on_art + tenofovir.f + level_art_trt.f + cd4_sqrt + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + mean_bmi + jnc_bp.f, data = df2, nk=3, n.impute = 20)

m3b <- fit.mult.impute(serum_creatinine ~ risk_alleles_3.f + rcs(age,3) + sex.f + ethnicity.f + rcs(duration_on_art,3) + tenofovir.f + level_art_trt.f + rcs(cd4_sqrt,3) + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + rcs(mean_bmi,3) + jnc_bp.f, fitter=orm, a3b, data=df2)

m3br <- model.est.orm(m3b) #[c(9,2:6,8)]

## Multivariate risk alleles category - all other kept constant (m1) [manual creation]
# median: age (40), duration (9), cd4 (21.88607), mean_bmi (23.04573)
# mode: Female (1253), Hausa/Fulani (1383), Tenofovir-yes (1047), Level-first line (1563), vl <=200 (1535), dm-no (1856), htn-no (1620), hf-no (1892), others-no (1479), ace_arb-no (1805), smoke-no (1798), jnc_bp-normal (1207)
new.data <- matrix(NA, ncol=length(m3b$Design$colnames), nrow=length(levels(df2$risk_alleles_3.f)))
colnames(new.data) <- m3b$Design$colnames
new.data[,"risk_alleles_3.f=1 allele"] <- c(0,1,0)
new.data[,"risk_alleles_3.f=2 alleles"] <- c(0,0,1)
new.data[1, 3:4] <- rcspline.eval(median(df2$age, na.rm=TRUE),  knots=m3b$Design$parms$age, inclx=TRUE)
new.data[2, 3:4] <- rcspline.eval(median(df2$age, na.rm=TRUE),  knots=m3b$Design$parms$age, inclx=TRUE)
new.data[3, 3:4] <- rcspline.eval(median(df2$age, na.rm=TRUE),  knots=m3b$Design$parms$age, inclx=TRUE)
new.data[,"sex.f=Female"] <- 1
new.data[,"ethnicity.f=Igbo"] <- 0
new.data[,"ethnicity.f=Yoruba"] <- 0
new.data[,"ethnicity.f=Other"] <- 0
new.data[1, 9:10] <- rcspline.eval(median(df2$duration_on_art, na.rm=TRUE),  knots=m3b$Design$parms$duration_on_art, inclx=TRUE)
new.data[2, 9:10] <- rcspline.eval(median(df2$duration_on_art, na.rm=TRUE),  knots=m3b$Design$parms$duration_on_art, inclx=TRUE)
new.data[3, 9:10] <- rcspline.eval(median(df2$duration_on_art, na.rm=TRUE),  knots=m3b$Design$parms$duration_on_art, inclx=TRUE)
new.data[,"tenofovir.f=Yes"] <- 1
new.data[,"level_art_trt.f=Second Line"] <- 0
new.data[1, 13:14] <- rcspline.eval(median(df2$cd4_sqrt, na.rm=TRUE),  knots=m3b$Design$parms$cd4_sqrt, inclx=TRUE)
new.data[2, 13:14] <- rcspline.eval(median(df2$cd4_sqrt, na.rm=TRUE),  knots=m3b$Design$parms$cd4_sqrt, inclx=TRUE)
new.data[3, 13:14] <- rcspline.eval(median(df2$cd4_sqrt, na.rm=TRUE),  knots=m3b$Design$parms$cd4_sqrt, inclx=TRUE)
new.data[,"vl.f=>200"] <- 0
new.data[,"dm.f=Yes"] <- 0
new.data[,"htn.f=Yes"] <- 0
new.data[,"hf.f=Yes"] <- 0
new.data[,"others.f=Yes"] <- 0
new.data[,"ace_arb.f=Yes"] <- 0
new.data[,"smoke.f=Yes"] <- 0
new.data[1, 22:23] <- rcspline.eval(median(df2$mean_bmi, na.rm=TRUE),  knots=m3b$Design$parms$mean_bmi, inclx=TRUE)
new.data[2, 22:23] <- rcspline.eval(median(df2$mean_bmi, na.rm=TRUE),  knots=m3b$Design$parms$mean_bmi, inclx=TRUE)
new.data[3, 22:23] <- rcspline.eval(median(df2$mean_bmi, na.rm=TRUE),  knots=m3b$Design$parms$mean_bmi, inclx=TRUE)
new.data[,"jnc_bp.f=Pre-hypertension"] <- 0
new.data[,"jnc_bp.f=Stage 1 Hypertension"] <- 0
new.data[,"jnc_bp.f=Stage 2 Hypertension"] <- 0

#estimates
sc.est.mean.b <- data.frame(mean.orm(m3b, new.data, se=TRUE))
sc.est.mean.b <- data.frame(alleles = c("No alleles","1 allele","2 alleles"), sc.est.mean.b)

sc.est.q.b <- data.frame(quantile.orm(m3b, new.data, probs=0.5, se=TRUE))
sc.est.q.b <- data.frame(alleles = c("No alleles","1 allele","2 alleles"), sc.est.q.b)

sc.est.cdf.b <- data.frame(cdf.orm(m3b, new.data, at.y=1.2, se=TRUE))
sc.est.cdf.b <- data.frame(alleles = c("No alleles","1 allele","2 alleles"), sc.est.cdf.b, est2 = 1-sc.est.cdf.b$est,se2 = sc.est.cdf.b$se, lb2 = 1-sc.est.cdf.b$ub, ub2 = 1-sc.est.cdf.b$lb)

```

```{r}
#results
print(m3b, coef=FALSE, digits=2)

m3br %>% kable(escape=F, digits = 4) %>% trimws %>% kable_styling(c("striped","bordered")) %>%
  footnote(general_title = "Reference Levels:", general = "Risk Alleles (No alleles), Sex (Male), Ethnicity (Hausa/Fulani), Taking Tenofovir? (No), Level of ART Treatment (First Line), Recent Viral Load Count (<=200), Have diabetes? (No), Have hypertension? (No), Have heart failure? (No), Have other comorbid conditions? (No), Taking ACE/ARB? (No), Do you smoke cigarettes? (No), JNC BP Classification (Normal)")
```

```{r}
sc.est.mean.b %>% kable(escape=F, caption = "Serum Creatinine Est Mean") %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "",general = "All other variables adjusted at median or mode")

sc.est.q.b %>% kable(escape=F, caption = "Serum Creatinine Est Quantile (Median)") %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "",general = "All other variables adjusted at median or mode")

sc.est.cdf.b %>% kable(escape=F, caption = "Serum Creatinine Est CDF (=1.2 mg/dL)") %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "",general = "All other variables adjusted at median or mode") %>% column_spec(c(6:9),bold=T)

```


## Logistic Regressions
```{r LR setup, include=FALSE}
df$uacr_30 <- ifelse(df$avg_uacr > 30, 1,
                     ifelse(df$avg_uacr <= 30, 0, NA))
df$uacr_300 <- ifelse(df$avg_uacr > 300, 1,
                     ifelse(df$avg_uacr <= 300, 0, NA))
df$egfr_60 <- ifelse(df$egfr < 60, 1,
                     ifelse(df$egfr >= 60, 0, NA))

model.est.logistic <- function(mod){ 
                  sum <- data.frame(summary(mod,age=c(30,40),duration_on_art=c(1,2),cd4_sqrt=c(sqrt(200),sqrt(500)),mean_bmi=c(20,25),risk_alleles.f="Low-Risk",sex.f="Male",ethnicity.f="Hausa/Fulani",tenofovir.f="No",level_art_trt.f="First Line",vl.f="<= 200",dm.f="No",htn.f="No",hf.f="No",others.f="No",ace_arb.f="No",smoke.f="No",jnc_bp.f="Normal"))
                  sum$or <- format(round(sum$Effect,2),nsmall=2,scientific=FALSE,trim=TRUE)
                  sum$lb <- format(round(sum$Lower.0.95,2),nsmall=2,scientific=FALSE,trim=TRUE)
                  #sum$ub <- format(round(sum$Upper.0.95,2),nsmall=2,scientific=FALSE,trim=TRUE)
                  sum$ub <- format(round(ifelse(sum$Upper.0.95>=100.00,100.00,sum$Upper.0.95),2),nsmall=2,scientific=FALSE,trim=TRUE)
                  sum$ci <- paste0("(",sum$lb,", ",sum$ub,")")
      
                  sum <- data.frame(var = ifelse(startsWith(rownames(sum), "X"),NA,sub("^([^.]*.[^.]*).*", "\\1", rownames(sum))),sum)
                  sum <- fill(sum,"var",.direction="down")
                  sum <- sum[sum$var %in% mod$Design$name & sum$Type==2,]
                  rownames(sum) <- c()
                  
                  level <- c(NA,NA,NA,NA,"High-Risk","Female","Igbo","Yoruba","Other","Yes","Second Line",">200","Yes","Yes","Yes","Yes","Yes","Yes","Pre-hypertension","Stage 1 Hypertension","Stage 2 Hypertension")
                  
                  sum <- data.frame(sum, level)
                  sum$Low <- ifelse(is.na(sum$level),round(sum$Low,2),NA)
                  sum$High <- ifelse(is.na(sum$level),round(sum$High,2),NA)
                  sum <- sum[,c("var","level","Low","High","Diff.","or","ci")]
                  
                  
                  anova <- data.frame(anova(mod))
                  anova$pval <- format(round(anova$P,4),nsmall=4,scientific=FALSE)
                  if(any(anova$pval=='0.0000')) {anova$pval[anova$pval=='0.0000']<-'< 0.0001'}
                  anova$var <- rownames(anova)
                  anova <- anova[rownames(anova) %in% mod$Design$name,c("pval","var")]
                  
                  labels <- data.frame(var = mod$Design$name, Variable = mod$Design$label)
                  
                  Table <- data.frame(join(anova,sum,by="var", type="inner"))
                  Table$pval[duplicated(Table$var)] <- NA
                  Table <- data.frame(join(Table, labels,by="var", type= "inner"))
                  
                  Table <- setnames(Table, old=c("pval","var","level","Low","High","Diff.","or","ci","Variable"), 
new=c("P-value","var","Level","Low","High","Diff.","OR","95% CI","Variable"))
                  
                  Table <- Table[,c("Variable","Level","Low","High","OR","95% CI","P-value")]
                  #Table$Level <- ifelse(is.na(Table$Level),Table$`Diff.`,Table$Level)
               }

```

### Avg uACR > 30
```{r avg uacr 30 lr, warning=F, include=FALSE}

df2 <- df[!is.na(df$risk_alleles.f) & !is.na(df$uacr_30),]

# data distribution
dd <- datadist(df2)
options(datadist='dd')
set.seed(20200407)

a4 <- aregImpute(~ uacr_30 + risk_alleles.f + age + sex.f + ethnicity.f + duration_on_art + tenofovir.f + level_art_trt.f + cd4_sqrt + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + mean_bmi + jnc_bp.f, data = df2, nk=3, n.impute = 20)

m4 <- fit.mult.impute(uacr_30 ~ risk_alleles.f + rcs(age,3) + sex.f + ethnicity.f + rcs(duration_on_art,3) + tenofovir.f + level_art_trt.f + rcs(cd4_sqrt,3) + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + rcs(mean_bmi,3) + jnc_bp.f, fitter=lrm,  xtrans=a4, data=df2)

m4r <- model.est.logistic(m4) #[c(9,2,5:6,8)]

```

```{r}
#results 
print(m4, coef=FALSE, digits=2)

m4r %>% kable(escape=F, digits = 4) %>% trimws %>% kable_styling(c("striped","bordered")) %>%
  footnote(general_title = "Reference Levels:", general = "Risk Allele Category (Low-Risk), Sex (Male), Ethnicity (Hausa/Fulani), Taking Tenofovir? (No), Level of ART Treatment (First Line), Recent Viral Load Count (<=200), Have diabetes? (No), Have hypertension? (No), Have heart failure? (No), Have other comorbid conditions? (No), Taking ACE/ARB? (No), Do you smoke cigarettes? (No), JNC BP Classification (Normal)")
```

#### Propensity
```{r}
fill_data <- function(impute = a4, data = df2, im = 1) {
     cbind.data.frame(impute.transcan(x = impute, 
                                     imputation = im, 
                                      data = data, 
                                      list.out = TRUE, 
                                      pr = FALSE))
}

est <- var.est <- NULL

for(i in 1:20){
  full_dat <- fill_data(im=i)
  m <- lrm(risk_alleles.f ~ rcs(age,3) + sex.f + ethnicity.f + rcs(duration_on_art,3) + tenofovir.f + level_art_trt.f + rcs(cd4_sqrt,3) + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + rcs(mean_bmi,3) + jnc_bp.f, data=full_dat)
  full_dat$pslp <- predict(m)
  m_a <- glm(uacr_30 ~ risk_alleles.f + pslp, data=full_dat, family="binomial")
  m_a$coefficients
  est[i] <- summary(m_a)$coeff[2,1]
  var.est[i] <- (summary(m_a)$coeff[2,2])^2
}

variable <- "Risk Allele Category"
level <- "High-Risk"
pooled.est <- mean(est)
pooled.se <- sqrt(mean(var.est) + var(est) * (1+1/20))
pooled.or <- exp(pooled.est)
pooled.lb <- exp(pooled.est + 1.96*pooled.se*(-1))
pooled.ub <- exp(pooled.est + 1.96*pooled.se*(1))
pooled.pval <- (1-pnorm(pooled.est/pooled.se))*2

pooled <- data.frame(
  variable,
  level,
  pooled.est = format(round(pooled.est,2),nsmall=2,scientific=FALSE),
  pooled.se = format(round(pooled.se,2),nsmall=2,scientific=FALSE),
  pooled.or = format(round(pooled.or,2),nsmall=2,scientific=FALSE),
  pooled.ci = paste0("(",format(round(pooled.lb,2),nsmall=2,scientific=FALSE),", ",format(round(pooled.ub,2),nsmall=2,scientific=FALSE),")"),
  pooled.pval = format(round(pooled.pval,4),nsmall=4,scientific=FALSE)
)

pooled <- setnames(pooled, 
                   old=c("variable","level","pooled.est","pooled.se","pooled.or",
                         "pooled.ci","pooled.pval"), 
                   new=c("Variable","Level","Estimate","Standard Error","OR","95% CI","P-value"))

pooled[,!names(pooled) %in% c("Estimate","Standard Error")] %>% kable(escape=F, digits = 4) %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "Reference Levels:", general = "Risk Allele Category (Low-Risk)")
```


### Avg uACR > 300
```{r avg uacr 300 lr, warning=F, include=FALSE}
df2 <- df[!is.na(df$risk_alleles.f) & !is.na(df$uacr_300),]

# data distribution
dd <- datadist(df2)
options(datadist='dd')
set.seed(20200407)

a5 <- aregImpute(~ uacr_300 + risk_alleles.f + age + sex.f + ethnicity.f + duration_on_art + tenofovir.f + level_art_trt.f + cd4_sqrt + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + mean_bmi + jnc_bp.f, data = df2, nk=3, n.impute = 20)

m5 <- fit.mult.impute(uacr_300 ~ risk_alleles.f + rcs(age,3) + sex.f + ethnicity.f + rcs(duration_on_art,3) + tenofovir.f + level_art_trt.f + rcs(cd4_sqrt,3) + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + rcs(mean_bmi,3) + jnc_bp.f, fitter=lrm,  xtrans=a5, data=df2)

m5r <- model.est.logistic(m5) #[c(9,2:6,8)]

```

```{r}
#results
print(m5, coef=FALSE, digits=2)

m5r %>% kable(escape=F, digits = 4) %>% trimws %>% kable_styling(c("striped","bordered")) %>%
  footnote(general_title = "Reference Levels:", general = "Risk Allele Category (Low-Risk), Sex (Male), Ethnicity (Hausa/Fulani), Taking Tenofovir? (No), Level of ART Treatment (First Line), Recent Viral Load Count (<=200), Have diabetes? (No), Have hypertension? (No), Have heart failure? (No), Have other comorbid conditions? (No), Taking ACE/ARB? (No), Do you smoke cigarettes? (No), JNC BP Classification (Normal)")
```

#### Propensity
```{r}
fill_data <- function(impute = a5, data = df2, im = 1) {
     cbind.data.frame(impute.transcan(x = impute, 
                                     imputation = im, 
                                      data = data, 
                                      list.out = TRUE, 
                                      pr = FALSE))
}

est <- var.est <- NULL

for(i in 1:20){
  full_dat <- fill_data(im=i)
  m <- lrm(risk_alleles.f ~ rcs(age,3) + sex.f + ethnicity.f + rcs(duration_on_art,3) + tenofovir.f + level_art_trt.f + rcs(cd4_sqrt,3) + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + rcs(mean_bmi,3) + jnc_bp.f, data=full_dat)
  full_dat$pslp <- predict(m)
  m_a <- glm(uacr_300 ~ risk_alleles.f + pslp, data=full_dat, family="binomial")
  m_a$coefficients
  est[i] <- summary(m_a)$coeff[2,1]
  var.est[i] <- (summary(m_a)$coeff[2,2])^2
}

variable <- "Risk Allele Category"
level <- "High-Risk"
pooled.est <- mean(est)
pooled.se <- sqrt(mean(var.est) + var(est) * (1+1/20))
pooled.or <- exp(pooled.est)
pooled.lb <- exp(pooled.est + 1.96*pooled.se*(-1))
pooled.ub <- exp(pooled.est + 1.96*pooled.se*(1))
pooled.pval <- (1-pnorm(pooled.est/pooled.se))*2

pooled <- data.frame(
  variable,
  level,
  pooled.est = format(round(pooled.est,2),nsmall=2,scientific=FALSE),
  pooled.se = format(round(pooled.se,2),nsmall=2,scientific=FALSE),
  pooled.or = format(round(pooled.or,2),nsmall=2,scientific=FALSE),
  pooled.ci = paste0("(",format(round(pooled.lb,2),nsmall=2,scientific=FALSE),", ",format(round(pooled.ub,2),nsmall=2,scientific=FALSE),")"),
  pooled.pval = format(round(pooled.pval,4),nsmall=4,scientific=FALSE)
)

pooled <- setnames(pooled, 
                   old=c("variable","level","pooled.est","pooled.se","pooled.or",
                         "pooled.ci","pooled.pval"), 
                   new=c("Variable","Level","Estimate","Standard Error","OR","95% CI","P-value"))

pooled[,!names(pooled) %in% c("Estimate","Standard Error")] %>% kable(escape=F, digits = 4) %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "Reference Levels:", general = "Risk Allele Category (Low-Risk)")
```


### eGFR < 60
```{r egfr lr, warning=F, include=FALSE}

df2 <- df[!is.na(df$risk_alleles.f) & !is.na(df$egfr_60),]

# data distribution
dd <- datadist(df2)
options(datadist='dd')
set.seed(20200407)

a6 <- aregImpute(~ egfr_60 + risk_alleles.f + age + sex.f + ethnicity.f + duration_on_art + tenofovir.f + level_art_trt.f + cd4_sqrt + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + mean_bmi + jnc_bp.f, data = df2, nk=3, n.impute = 20)

m6 <- fit.mult.impute(egfr_60 ~ risk_alleles.f + rcs(age,3) + sex.f + ethnicity.f + rcs(duration_on_art,3) + tenofovir.f + level_art_trt.f + rcs(cd4_sqrt,3) + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + rcs(mean_bmi,3) + jnc_bp.f, fitter=lrm,  xtrans=a6, data=df2)

m6r <- model.est.logistic(m6) #[c(9,2:6,8)]

```

```{r}
#results
print(m6, coef=FALSE, digits=2)

m6r %>% kable(escape=F, digits = 4) %>% trimws %>% kable_styling(c("striped","bordered")) %>%
  footnote(general_title = "Reference Levels:", general = "Risk Allele Category (Low-Risk), Sex (Male), Ethnicity (Hausa/Fulani), Taking Tenofovir? (No), Level of ART Treatment (First Line), Recent Viral Load Count (<=200), Have diabetes? (No), Have hypertension? (No), Have heart failure? (No), Have other comorbid conditions? (No), Taking ACE/ARB? (No), Do you smoke cigarettes? (No), JNC BP Classification (Normal)")
```

#### Propensity
```{r}
fill_data <- function(impute = a6, data = df2, im = 1) {
     cbind.data.frame(impute.transcan(x = impute, 
                                     imputation = im, 
                                      data = data, 
                                      list.out = TRUE, 
                                      pr = FALSE))
}

est <- var.est <- NULL

for(i in 1:20){
  full_dat <- fill_data(im=i)
  m <- lrm(risk_alleles.f ~ rcs(age,3) + sex.f + ethnicity.f + rcs(duration_on_art,3) + tenofovir.f + level_art_trt.f + rcs(cd4_sqrt,3) + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + rcs(mean_bmi,3) + jnc_bp.f, data=full_dat)
  full_dat$pslp <- predict(m)
  m_a <- glm(egfr_60 ~ risk_alleles.f + pslp, data=full_dat, family="binomial")
  m_a$coefficients
  est[i] <- summary(m_a)$coeff[2,1]
  var.est[i] <- (summary(m_a)$coeff[2,2])^2
}

variable <- "Risk Allele Category"
level <- "High-Risk"
pooled.est <- mean(est)
pooled.se <- sqrt(mean(var.est) + var(est) * (1+1/20))
pooled.or <- exp(pooled.est)
pooled.lb <- exp(pooled.est + 1.96*pooled.se*(-1))
pooled.ub <- exp(pooled.est + 1.96*pooled.se*(1))
pooled.pval <- (1-pnorm(pooled.est/pooled.se))*2

pooled <- data.frame(
  variable,
  level,
  pooled.est = format(round(pooled.est,2),nsmall=2,scientific=FALSE),
  pooled.se = format(round(pooled.se,2),nsmall=2,scientific=FALSE),
  pooled.or = format(round(pooled.or,2),nsmall=2,scientific=FALSE),
  pooled.ci = paste0("(",format(round(pooled.lb,2),nsmall=2,scientific=FALSE),", ",format(round(pooled.ub,2),nsmall=2,scientific=FALSE),")"),
  pooled.pval = format(round(pooled.pval,4),nsmall=4,scientific=FALSE)
)

pooled <- setnames(pooled, 
                   old=c("variable","level","pooled.est","pooled.se","pooled.or",
                         "pooled.ci","pooled.pval"), 
                   new=c("Variable","Level","Estimate","Standard Error","OR","95% CI","P-value"))

pooled[,!names(pooled) %in% c("Estimate","Standard Error")] %>% kable(escape=F, digits = 4) %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "Reference Levels:", general = "Risk Allele Category (Low-Risk)")
```


### CKD - defined by eGFR < 60 & uACR > 300 (Propensity only)
```{r ckd lr, include=F, warning=F}
df2 <- df[!is.na(df$risk_alleles.f) & !is.na(df$ckd_byboth.f),]

# data distribution
dd <- datadist(df2)
options(datadist='dd')
set.seed(20200407)

a7 <- aregImpute(~ ckd_byboth.f + risk_alleles.f + age + sex.f + ethnicity.f + duration_on_art + tenofovir.f + level_art_trt.f + cd4_sqrt + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + mean_bmi + jnc_bp.f, data = df2, nk=3, n.impute = 20)
```

```{r ckd results}
fill_data <- function(impute = a7, data = df2, im = 1) {
     cbind.data.frame(impute.transcan(x = impute, 
                                     imputation = im, 
                                      data = data, 
                                      list.out = TRUE, 
                                      pr = FALSE))
}

est <- var.est <- NULL

for(i in 1:20){
  full_dat <- fill_data(im=i)
  m <- lrm(risk_alleles.f ~ rcs(age,3) + sex.f + ethnicity.f + rcs(duration_on_art,3) + tenofovir.f + level_art_trt.f + rcs(cd4_sqrt,3) + vl.f + dm.f + htn.f + hf.f + others.f + ace_arb.f + smoke.f + rcs(mean_bmi,3) + jnc_bp.f, data=full_dat)
  full_dat$pslp <- predict(m)
  m_a <- glm(ckd_byboth.f ~ risk_alleles.f + pslp, data=full_dat, family="binomial")
  m_a$coefficients
  est[i] <- summary(m_a)$coeff[2,1]
  var.est[i] <- (summary(m_a)$coeff[2,2])^2
}

variable <- "Risk Allele Category"
level <- "High-Risk"
pooled.est <- mean(est)
pooled.se <- sqrt(mean(var.est) + var(est) * (1+1/20))
pooled.or <- exp(pooled.est)
pooled.lb <- exp(pooled.est + 1.96*pooled.se*(-1))
pooled.ub <- exp(pooled.est + 1.96*pooled.se*(1))
pooled.pval <- (1-pnorm(pooled.est/pooled.se))*2

pooled <- data.frame(
  variable,
  level,
  pooled.est = format(round(pooled.est,2),nsmall=2,scientific=FALSE),
  pooled.se = format(round(pooled.se,2),nsmall=2,scientific=FALSE),
  pooled.or = format(round(pooled.or,2),nsmall=2,scientific=FALSE),
  pooled.ci = paste0("(",format(round(pooled.lb,2),nsmall=2,scientific=FALSE),", ",format(round(pooled.ub,2),nsmall=2,scientific=FALSE),")"),
  pooled.pval = format(round(pooled.pval,4),nsmall=4,scientific=FALSE)
)

pooled <- setnames(pooled, 
                   old=c("variable","level","pooled.est","pooled.se","pooled.or",
                         "pooled.ci","pooled.pval"), 
                   new=c("Variable","Level","Estimate","Standard Error","OR","95% CI","P-value"))

pooled[,!names(pooled) %in% c("Estimate","Standard Error")] %>% kable(escape=F, digits = 4) %>% trimws %>% kable_styling(c("striped","bordered")) %>% footnote(general_title = "Reference Levels:", general = "Risk Allele Category (Low-Risk)")

```