--- title: "Basics of Simulation for Study Planning" author: "Matthew S. Shotwell" date: "5/8/2020" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) library('dplyr') library('magrittr') library('knitr') library('ggplot2') ``` ## Simple example of power simulation - Estimate power of one sample study - Simulation step: - Draw a sample of size 'n' - Generate response using normal dist - Analysis step: - Apply one-sided test (H0: mean = 0) - Compare p-value with 0.05 threshold ```{r cache=TRUE} ## simulation step sim_step <- function(n = 100, mean = 0, sd = 1, ...) data.frame(x = rnorm(n, mean, sd)) ## analysis step ## dat - result of call to sim_step ana_step <- function(dat, alp = 0.05, ...) t.test(dat$x)$p.value < alp ## compute power assuming n = 100, mean = 0.1, sd = 1 replicate(10000, ana_step(sim_step(100, mean=0.1, sd=1))) %>% mean ``` ## Simple adaptive study example - One-arm study (like before) - One-sided test (H0: mean = 0) - Enroll up to 500 - Interim analysis every 100 - At each analysis, stop for efficacy if p-value < 0.05 - Consider following operating characteristics: - Type-I error rate - Power - Sample size distribution ```{r cache=TRUE} ## simulate simple adaptive study sim_study <- function(nmax = 500, intr = 100, ...) { ## empty data dat <- data.frame() ## generate data in rounds of size 'intr' while(nrow(dat) < nmax) { ## generate next round of data dat %<>% rbind(sim_step(n = min(intr, nmax-nrow(dat)), ...)) ## stop early for efficacy if(ana_step(dat, ...)) break ## stop early for futility #if(fut_test(dat, ...)) # break } ## return vector of info about study c(n = nrow(dat), reject = ana_step(dat, ...)) } ## Example output replicate(10, sim_study()) ## Sample size dist and power under null (type-I error) sim_result <- replicate(10000, sim_study(mean = 0)) ## Type-I error mean(sim_result['reject',]) ## Sample size dist table(sim_result['n',]) %>% prop.table ## Adjust 'alp' such that Type-I error is about 0.05 sim_result <- replicate(10000, sim_study(mean = 0, alp = 0.02)) ## Type-I error mean(sim_result['reject',]) ## Sample size dist table(sim_result['n',]) %>% prop.table ## Sample size dist and power under mean = 0.15 sim_result <- replicate(10000, sim_study(mean = 0.15, alp = 0.02)) ## Power mean(sim_result['reject',]) ## Sample size dist table(sim_result['n',]) %>% prop.table ``` ## Dealing with uncertainty ```{r cache=TRUE} ## Specify 'low', 'mid', and 'high' values for unknown ## parameters: 'mean' and 'sd' ## also include null value of mean so we can estimate type-I error pars <- expand.grid(Mean = c(0.00, 0.10, 0.15, 0.20), SD = c(0.8, 1.0, 1.2)) ## redo simulations for all combinations of parameters sims <- apply(pars, 1, function(p) { replicate(10000, sim_study(mean=p['Mean'], sd=p['SD'], alp=0.02), simplify = FALSE) }) ## compute power/type-I error and for each comb. of parameters powr <- sapply(sims, function(x) mean(do.call(rbind, x)[,'reject'])) ## compute avg. sample size for each comb. of parameters ## each combination of parameters ssiz <- sapply(sims, function(x) mean(do.call(rbind, x)[,'n'])) ## compute SD of sample size for each comb. of parameters ## each combination of parameters ssid <- sapply(sims, function(x) sd(do.call(rbind, x)[,'n'])) pars %<>% mutate(`Power/Type-I Err` = powr, `Avg. Sample Size` = ssiz, `SD Sample Size` = ssid) kable(pars) ``` ```{r} pars %<>% mutate(SD = factor(SD, levels=c(0.8, 1.0, 1.2), labels=c('0.8','1.0','1.2'))) %>% mutate(avg_lo = `Avg. Sample Size` - `SD Sample Size`) %>% mutate(avg_hi = `Avg. Sample Size` + `SD Sample Size`) pars %>% ggplot(aes(x=Mean, y=`Power/Type-I Err`, group=SD, color=SD)) + geom_line() + geom_point() + geom_hline(yintercept=0.05, linetype='dashed') pars %>% ggplot(aes(x=Mean, y=`Avg. Sample Size`, group=SD, color=SD)) + geom_point(position=position_dodge(width=0.01)) + geom_line(position=position_dodge(width=0.01)) + geom_linerange(aes(ymin=avg_lo, ymax=avg_hi), position=position_dodge(width=0.01)) ```