---
title: "Basics of Simulation for Study Planning"
author: "Matthew S. Shotwell"
date: "5/8/2020"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library('dplyr')
library('magrittr')
library('knitr')
library('ggplot2')
```

## Simple example of power simulation

   - Estimate power of one sample study
   - Simulation step:
      - Draw a sample of size 'n'
      - Generate response using normal dist
   - Analysis step:
      - Apply one-sided test (H0: mean = 0)
      - Compare p-value with 0.05 threshold

```{r cache=TRUE}

## simulation step
sim_step <- function(n = 100, mean = 0, sd = 1, ...)
  data.frame(x = rnorm(n, mean, sd))

## analysis step
## dat - result of call to sim_step
ana_step <- function(dat, alp = 0.05, ...)
  t.test(dat$x)$p.value < alp

## compute power assuming n = 100, mean = 0.1, sd = 1
replicate(10000, ana_step(sim_step(100, mean=0.1, sd=1))) %>%
  mean

```


## Simple adaptive study example

   - One-arm study (like before)
   - One-sided test (H0: mean = 0)
   - Enroll up to 500
   - Interim analysis every 100
   - At each analysis, stop for efficacy if p-value < 0.05 
   - Consider following operating characteristics:
      - Type-I error rate
      - Power
      - Sample size distribution

```{r cache=TRUE}

## simulate simple adaptive study
sim_study <- function(nmax = 500, intr = 100, ...) {
  
  ## empty data
  dat <- data.frame()

  ## generate data in rounds of size 'intr'
  while(nrow(dat) < nmax) {
    
    ## generate next round of data
    dat %<>% rbind(sim_step(n = min(intr, nmax-nrow(dat)), ...))
    
    ## stop early for efficacy 
    if(ana_step(dat, ...))
      break
    
    ## stop early for futility
    #if(fut_test(dat, ...))
    #  break
    
  }

  ## return vector of info about study
  c(n = nrow(dat), reject = ana_step(dat, ...))
}

## Example output
replicate(10, sim_study())

## Sample size dist and power under null (type-I error) 
sim_result <- replicate(10000, sim_study(mean = 0))
## Type-I error
mean(sim_result['reject',])
## Sample size dist
table(sim_result['n',]) %>% prop.table


## Adjust 'alp' such that Type-I error is about 0.05
sim_result <- replicate(10000, sim_study(mean = 0, alp = 0.02))
## Type-I error
mean(sim_result['reject',])
## Sample size dist
table(sim_result['n',]) %>% prop.table


## Sample size dist and power under mean = 0.15
sim_result <- replicate(10000, sim_study(mean = 0.15, alp = 0.02))
## Power
mean(sim_result['reject',])
## Sample size dist
table(sim_result['n',]) %>% prop.table

```

## Dealing with uncertainty
```{r cache=TRUE}
## Specify 'low', 'mid', and 'high' values for unknown 
## parameters: 'mean' and 'sd'
## also include null value of mean so we can estimate type-I error
pars <- expand.grid(Mean = c(0.00, 0.10, 0.15, 0.20),
                      SD = c(0.8, 1.0, 1.2))

## redo simulations for all combinations of parameters
sims <- apply(pars, 1, function(p) {
  replicate(10000, sim_study(mean=p['Mean'], sd=p['SD'], alp=0.02),
            simplify = FALSE)
})

## compute power/type-I error and for each comb. of parameters
powr <- sapply(sims, function(x) 
  mean(do.call(rbind, x)[,'reject']))

## compute avg. sample size for each comb. of parameters
## each combination of parameters
ssiz <- sapply(sims, function(x) 
  mean(do.call(rbind, x)[,'n']))

## compute SD of sample size for each comb. of parameters
## each combination of parameters
ssid <- sapply(sims, function(x) 
  sd(do.call(rbind, x)[,'n']))

pars %<>% 
  mutate(`Power/Type-I Err` = powr,
         `Avg. Sample Size` = ssiz,
         `SD Sample Size` = ssid)

kable(pars)
```

```{r}
pars %<>% 
  mutate(SD = factor(SD, 
    levels=c(0.8, 1.0, 1.2),
    labels=c('0.8','1.0','1.2'))) %>%
  mutate(avg_lo = `Avg. Sample Size` - `SD Sample Size`) %>%
  mutate(avg_hi = `Avg. Sample Size` + `SD Sample Size`)

pars %>% 
  ggplot(aes(x=Mean, y=`Power/Type-I Err`,
             group=SD, color=SD)) +
  geom_line() + 
  geom_point() + 
  geom_hline(yintercept=0.05, linetype='dashed')

pars %>% 
  ggplot(aes(x=Mean, y=`Avg. Sample Size`,
             group=SD, color=SD)) +
  geom_point(position=position_dodge(width=0.01)) +
  geom_line(position=position_dodge(width=0.01)) + 
  geom_linerange(aes(ymin=avg_lo, ymax=avg_hi),
                 position=position_dodge(width=0.01))

```