You are here: Vanderbilt Biostatistics Wiki>Main Web>PtsdExample (revision 1)EditAttach

Why you can't trust randomization: the importance of restricted randomization in RCTs.

Robert Greevy, PhD

Please Download the Following

The dataset of examiners that need to be randomly assigned to treatment and control.

The R code which you'll be using today (it's also in red below).

If you want a copy of the handout,

R code for reference and cutting and pasting

# Cut and paste sections of code into R to run them

####### Get Started #######

# Load in the dataset
  # edit your file path for your computer
d <- read.csv("C:/Users/Robert/Desktop/PtsdEx01.csv")
names(d)

# Summarize the dataset
dim(d) # specify the number of rows and columns
sum(d$Site1) # how many examiners in site 1
sum(d$Site2)
sum(d$Site3)
sum(d$Site4)
sum(d$Site5)
sum(d$Site6)
sum(d$Site7)
sum(d$Female)
sum(d$TbiEx)
summary(d$Age)
summary(d$Experience)



####### Randomize using a simple coin toss approach #######

# the code rbinom(38,1,0.5) tosses a 0/1 coin 38 times with probability of 1 = 0.5
Treatment <- rbinom(38,1,0.5)   # toss the coin
d$Treatment <- Treatment    # add the treatment to your dataset



# How did your randomization turn out?
# How many examiners are in your treated group vs the control group?
sum(d$Treatment)  # number in treated group
sum((1-d$Treatment))  # number in control group

# How many of the Site1 examiners are in the treated group vs the control group?
sum(d$Treatment*d$Site1)
sum((1-d$Treatment)*d$Site1)

# How about Site2
sum(d$Treatment*d$Site2)
sum((1-d$Treatment)*d$Site2)

# Site3
sum(d$Treatment*d$Site3)
sum((1-d$Treatment)*d$Site3)
# Site4
sum(d$Treatment*d$Site4)
sum((1-d$Treatment)*d$Site4)
# Site5
sum(d$Treatment*d$Site5)
sum((1-d$Treatment)*d$Site5)
# Site6
sum(d$Treatment*d$Site6)
sum((1-d$Treatment)*d$Site6)
# Site7
sum(d$Treatment*d$Site7)
sum((1-d$Treatment)*d$Site7)

# Female
sum(d$Treatment*d$Female)
sum((1-d$Treatment)*d$Female)
# TBI
sum(d$Treatment*d$TbiEx)
sum((1-d$Treatment)*d$TbiEx)


# How about Age
mean(d$Age*d$Treatment)
mean(d$Age*(1-d$Treatment))
mean(d$Age*d$Treatment) - mean(d$Age*(1-d$Treatment))

# Experience
mean(d$Experience*d$Treatment)
mean(d$Experience*(1-d$Treatment))
mean(d$Experience*d$Treatment) - mean(d$Experience*(1-d$Treatment))




####### Randomize using drawing balls from a bowl approach (restricting to 19 in each group) #######
balls <- c(rep(0,19),rep(1,19)) # put 19 0-balls and 19 1-balls in the bowl
Treatment <- sample(balls,38,replace=F) # mix up the balls
d$Treatment <- Treatment    # add the new treatment to your dataset

# now see how you did
# Examiners in each group (treatment, control)
c(sum(d$Treatment), sum((1-d$Treatment)))
# Site1 (treatment, control)
c(sum(d$Treatment*d$Site1), sum((1-d$Treatment)*d$Site1))
# Site2
c(sum(d$Treatment*d$Site2), sum((1-d$Treatment)*d$Site2))
# Site3
c(sum(d$Treatment*d$Site3), sum((1-d$Treatment)*d$Site3))
# Site4
c(sum(d$Treatment*d$Site4), sum((1-d$Treatment)*d$Site4))
# Site5
c(sum(d$Treatment*d$Site5), sum((1-d$Treatment)*d$Site5))
# Site6
c(sum(d$Treatment*d$Site6), sum((1-d$Treatment)*d$Site6))
# Site7
c(sum(d$Treatment*d$Site7), sum((1-d$Treatment)*d$Site7))
# Female
c(sum(d$Treatment*d$Female), sum((1-d$Treatment)*d$Female))
# TBI
c(sum(d$Treatment*d$TbiEx), sum((1-d$Treatment)*d$TbiEx))
# Age (mean treatment, mean control, difference)
round(c(mean(d$Age*d$Treatment), mean(d$Age*(1-d$Treatment)), mean(d$Age*d$Treatment) - mean(d$Age*(1-d$Treatment))),1)
# Experience
round(c(mean(d$Experience*d$Treatment), mean(d$Experience*(1-d$Treatment)), mean(d$Experience*d$Treatment) - mean(d$Experience*(1-d$Treatment))),1)



####### Randomize using drawing balls from a bowl approach (restricting to 19 in each group) #######
# this is going to take some work
balls <- c(rep(0,3),rep(1,3)) # put 3 balls of each type for site1 in the bowl
Treatment <- sample(balls,6,replace=F) # sample treatment for site1
balls <- c(rep(0,2),rep(1,2),rbinom(1,1,0.5)) # put in either 2 and 3 treatment/controls balls or 3 and 2 with prob 1/2 each.
Treatment <- c(Treatment,sample(balls,5,replace=F)) # add in sample treatment for site2
balls <- c(rep(0,2),rep(1,2)) # site3
Treatment <- c(Treatment,sample(balls,4,replace=F)) # site3
balls <- c(rep(0,2),rep(1,2),rbinom(1,1,0.5)) # site4
Treatment <- c(Treatment,sample(balls,5,replace=F)) # site4
balls <- c(rep(0,3),rep(1,3)) # site5
Treatment <- c(Treatment,sample(balls,6,replace=F)) # site5
balls <- c(rep(0,2),rep(1,2),rbinom(1,1,0.5)) # site6
Treatment <- c(Treatment,sample(balls,5,replace=F)) # site6
balls <- c(rep(0,3),rep(1,3),rbinom(1,1,0.5)) # site7
Treatment <- c(Treatment,sample(balls,7,replace=F)) # site7
d$Treatment <- Treatment

# now see how you did
# Examiners in each group (treatment, control)
c(sum(d$Treatment), sum((1-d$Treatment)))
# Site1 (treatment, control)
c(sum(d$Treatment*d$Site1), sum((1-d$Treatment)*d$Site1))
# Site2
c(sum(d$Treatment*d$Site2), sum((1-d$Treatment)*d$Site2))
# Site3
c(sum(d$Treatment*d$Site3), sum((1-d$Treatment)*d$Site3))
# Site4
c(sum(d$Treatment*d$Site4), sum((1-d$Treatment)*d$Site4))
# Site5
c(sum(d$Treatment*d$Site5), sum((1-d$Treatment)*d$Site5))
# Site6
c(sum(d$Treatment*d$Site6), sum((1-d$Treatment)*d$Site6))
# Site7
c(sum(d$Treatment*d$Site7), sum((1-d$Treatment)*d$Site7))
# Female
c(sum(d$Treatment*d$Female), sum((1-d$Treatment)*d$Female))
# TBI
c(sum(d$Treatment*d$TbiEx), sum((1-d$Treatment)*d$TbiEx))
# Age (mean treatment, mean control, difference)
round(c(mean(d$Age*d$Treatment), mean(d$Age*(1-d$Treatment)), mean(d$Age*d$Treatment) - mean(d$Age*(1-d$Treatment))),1)
# Experience
round(c(mean(d$Experience*d$Treatment), mean(d$Experience*(1-d$Treatment)), mean(d$Experience*d$Treatment) - mean(d$Experience*(1-d$Treatment))),1)



####### Randomize using Mahalanobis Distance Matching #######
# reference 1: Greevy R, Lu B, Silber JH, Rosenbaum P.  Optimal multivariate matching before randomization.  Biostatistics. 2004 Apr;5(2):263-75.
# some behinds the scenes work matched the examiners in pairs using the default M-distance weighting
Treatment <- rep(0,38) # set treatment to all 0s
temp <- sample(c(1,5),1); Treatment[temp] <- 1; # randomly pick one of the pair to be treated
temp <- sample(c(2,4),1); Treatment[temp] <- 1;
temp <- sample(c(3,19),1); Treatment[temp] <- 1;
temp <- sample(c(6,31),1); Treatment[temp] <- 1;
temp <- sample(c(7,9),1); Treatment[temp] <- 1;
temp <- sample(c(8,10),1); Treatment[temp] <- 1;
temp <- sample(c(11,38),1); Treatment[temp] <- 1;
temp <- sample(c(12,15),1); Treatment[temp] <- 1;
temp <- sample(c(13,14),1); Treatment[temp] <- 1;
temp <- sample(c(16,17),1); Treatment[temp] <- 1;
temp <- sample(c(18,20),1); Treatment[temp] <- 1;
temp <- sample(c(21,22),1); Treatment[temp] <- 1;
temp <- sample(c(23,25),1); Treatment[temp] <- 1;
temp <- sample(c(24,26),1); Treatment[temp] <- 1;
temp <- sample(c(27,28),1); Treatment[temp] <- 1;
temp <- sample(c(29,30),1); Treatment[temp] <- 1;
temp <- sample(c(32,33),1); Treatment[temp] <- 1;
temp <- sample(c(34,36),1); Treatment[temp] <- 1;
temp <- sample(c(35,37),1); Treatment[temp] <- 1;
d$Treatment <- Treatment


# now see how you did (female has the potential for imbalance)
# Examiners in each group (treatment, control)
c(sum(d$Treatment), sum((1-d$Treatment)))
# Site1 (treatment, control)
c(sum(d$Treatment*d$Site1), sum((1-d$Treatment)*d$Site1))
# Site2
c(sum(d$Treatment*d$Site2), sum((1-d$Treatment)*d$Site2))
# Site3
c(sum(d$Treatment*d$Site3), sum((1-d$Treatment)*d$Site3))
# Site4
c(sum(d$Treatment*d$Site4), sum((1-d$Treatment)*d$Site4))
# Site5
c(sum(d$Treatment*d$Site5), sum((1-d$Treatment)*d$Site5))
# Site6
c(sum(d$Treatment*d$Site6), sum((1-d$Treatment)*d$Site6))
# Site7
c(sum(d$Treatment*d$Site7), sum((1-d$Treatment)*d$Site7))
# Female
c(sum(d$Treatment*d$Female), sum((1-d$Treatment)*d$Female))
# TBI
c(sum(d$Treatment*d$TbiEx), sum((1-d$Treatment)*d$TbiEx))
# Age (mean treatment, mean control, difference)
round(c(mean(d$Age*d$Treatment), mean(d$Age*(1-d$Treatment)), mean(d$Age*d$Treatment) - mean(d$Age*(1-d$Treatment))),1)
# Experience
round(c(mean(d$Experience*d$Treatment), mean(d$Experience*(1-d$Treatment)), mean(d$Experience*d$Treatment) - mean(d$Experience*(1-d$Treatment))),1)




####### Randomize using Weighted Mahalanobis Distance Matching #######
# Female given weight = 2
# see webapp designed by Rob Greevy, Bo Lu, and Cole Beck
Treatment <- rep(0,38)
temp <- sample(c(1,5),1); Treatment[temp] <- 1;
temp <- sample(c(2,4),1); Treatment[temp] <- 1;
temp <- sample(c(3,19),1); Treatment[temp] <- 1;
temp <- sample(c(6,9),1); Treatment[temp] <- 1;
temp <- sample(c(7,22),1); Treatment[temp] <- 1;
temp <- sample(c(8,10),1); Treatment[temp] <- 1;
temp <- sample(c(11,38),1); Treatment[temp] <- 1;
temp <- sample(c(12,15),1); Treatment[temp] <- 1;
temp <- sample(c(13,14),1); Treatment[temp] <- 1;
temp <- sample(c(16,17),1); Treatment[temp] <- 1;
temp <- sample(c(18,20),1); Treatment[temp] <- 1;
temp <- sample(c(21,31),1); Treatment[temp] <- 1;
temp <- sample(c(23,25),1); Treatment[temp] <- 1;
temp <- sample(c(24,26),1); Treatment[temp] <- 1;
temp <- sample(c(27,28),1); Treatment[temp] <- 1;
temp <- sample(c(29,30),1); Treatment[temp] <- 1;
temp <- sample(c(32,33),1); Treatment[temp] <- 1;
temp <- sample(c(34,36),1); Treatment[temp] <- 1;
temp <- sample(c(35,37),1); Treatment[temp] <- 1;
d$Treatment <- Treatment


# now see how you did (site2 has the potential for imbalance)
# Examiners in each group (treatment, control)
c(sum(d$Treatment), sum((1-d$Treatment)))
# Site1 (treatment, control)
c(sum(d$Treatment*d$Site1), sum((1-d$Treatment)*d$Site1))
# Site2
c(sum(d$Treatment*d$Site2), sum((1-d$Treatment)*d$Site2))
# Site3
c(sum(d$Treatment*d$Site3), sum((1-d$Treatment)*d$Site3))
# Site4
c(sum(d$Treatment*d$Site4), sum((1-d$Treatment)*d$Site4))
# Site5
c(sum(d$Treatment*d$Site5), sum((1-d$Treatment)*d$Site5))
# Site6
c(sum(d$Treatment*d$Site6), sum((1-d$Treatment)*d$Site6))
# Site7
c(sum(d$Treatment*d$Site7), sum((1-d$Treatment)*d$Site7))
# Female
c(sum(d$Treatment*d$Female), sum((1-d$Treatment)*d$Female))
# TBI
c(sum(d$Treatment*d$TbiEx), sum((1-d$Treatment)*d$TbiEx))
# Age (mean treatment, mean control, difference)
round(c(mean(d$Age*d$Treatment), mean(d$Age*(1-d$Treatment)), mean(d$Age*d$Treatment) - mean(d$Age*(1-d$Treatment))),1)
# Experience
round(c(mean(d$Experience*d$Treatment), mean(d$Experience*(1-d$Treatment)), mean(d$Experience*d$Treatment) - mean(d$Experience*(1-d$Treatment))),1)


Reference link: http://data.vanderbilt.edu/rapache/morematch
Edit | Attach | Print version | History: r4 < r3 < r2 < r1 | Backlinks | View wiki text | Edit WikiText | More topic actions...
Topic revision: r1 - 24 Sep 2008, RobertGreevy
 

This site is powered by FoswikiCopyright © 2013-2022 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding Vanderbilt Biostatistics Wiki? Send feedback