1. Using only the runif function in R, generate 10,000 iid random variables from
  1. \(X \sim \text{Exponential}(5).\)
  2. \(Y \sim \text{Gamma}(3,1.5).\) Hint: Get in terms of exponential distribution.
  3. \(Z \sim \text{Chi Squared with 6 degrees of freedom}\) Hint: Get in terms of exponential distribution.
  1. Find is \(P(X>Y)?\)

  2. Generate 10,000 draws of \(X_{(k)}\) when \(X_i\) is iid and \(X_i \sim \text{Uniform}(0,1)\). Plot the empirical density for \(X_{(k)}\) (histogram) and show that it is \(\text{Beta}(k,n+1-k)\) when
  1. \(k = 1\) and \(n = 100\)
  2. \(k = 50\) and \(n = 100\)
  3. \(k = 100\) and \(n = 100\)

Question 1a

n = 10000
u = runif(n,0,1)

## part a
x = -5*(log(1-u))
mean(x)
## [1] 4.936507
var(x)
## [1] 23.98348

Check Question 1a

x_check = rexp(n, 1/5)
mean(x_check)
## [1] 4.980291
var(x_check)
## [1] 25.12368
# Overlaid histograms with means
dat = data.frame(c(x,x_check),c(rep("uniform",n),rep("direct",n)))
colnames(dat) = c("X","simulation_method")
ggplot(dat, aes(x=X, fill=simulation_method)) +
    geom_histogram(binwidth=.5, alpha=.5, position="identity")

Question 1b

y = rep(-1,n)
for (i in 1:n) {
  u = runif(3,0,1)
  x_temp = -1.5*log(1-u)
  y[i] = sum(x_temp)
}
mean(y)
## [1] 4.50226
var(y)
## [1] 6.894584

Check Question 1b

y_check = rgamma(n,3, scale = 1.5)
mean(y_check)
## [1] 4.476465
var(y_check)
## [1] 6.417549
# Overlaid histograms with means
dat = data.frame(c(y,y_check),c(rep("uniform",n),rep("direct",n)))
colnames(dat) = c("Y","simulation_method")
ggplot(dat, aes(x=Y, fill=simulation_method)) +
    geom_histogram(binwidth=.5, alpha=.5, position="identity")

Question 1c

z = rep(-1,n)
for (i in 1:n) {
  u = runif(3,0,1)
  x_temp2 = -2*log(1-u)
  z[i]<-sum(x_temp2)
}
mean(z)
## [1] 5.978351
var(z)
## [1] 11.90083

Check Question 1c

z_check = rgamma(n,3, scale = 2)
mean(z_check)
## [1] 5.968321
var(z_check)
## [1] 12.291
# Overlaid histograms with means
dat = data.frame(c(z,z_check),c(rep("uniform",n),rep("direct",n)))
colnames(dat) = c("Z","simulation_method")
ggplot(dat, aes(x=Z, fill=simulation_method)) +
    geom_histogram(binwidth=.5, alpha=.5, position="identity")

Question 2

ind = x>y
sum(ind)/n
## [1] 0.457

Question 3a

###  Distribution of order statistics
n_sample = 100
k = 1
xk = rep(-1,n)
for (i in 1:n) {
  temp = runif(n_sample,0,1)
  xk[i]<-temp[order(temp)][k]
}

## check with histogram
hist(xk,freq=FALSE, main = "Density of k = 1",xlab = "X_(k)")
t<-c(1:1000)/1000
lines(t,dbeta(t,k,n_sample+1-k), col = 2)

## check mean and variance
mean(xk)
## [1] 0.01001045
k/(n_sample + 1)
## [1] 0.00990099
var(xk)
## [1] 9.811699e-05
k*(n_sample+1-k)/((n_sample+1)^2*(n_sample+2))
## [1] 9.610746e-05

Question 3b

###  Distribution of order statistics
n_sample = 100
k = 50
xk = rep(-1,n)
for (i in 1:n) {
  temp = runif(n_sample,0,1)
  xk[i]<-temp[order(temp)][k]
}

## check with histogram
hist(xk,freq=FALSE, main = "Density of k = 50",xlab = "X_(k)")
t<-c(1:1000)/1000
lines(t,dbeta(t,k,n_sample+1-k), col = 2)

## check mean and variance
mean(xk)
## [1] 0.4952559
k/(n_sample + 1)
## [1] 0.4950495
var(xk)
## [1] 0.002363937
k*(n_sample+1-k)/((n_sample+1)^2*(n_sample+2))
## [1] 0.00245074

Question 3c

###  Distribution of order statistics
n_sample = 100
k = 100
xk = rep(-1,n)
for (i in 1:n) {
  temp = runif(n_sample,0,1)
  xk[i]<-temp[order(temp)][k]
}

## check with histogram
hist(xk,freq=FALSE, main = "Density of k = 100",xlab = "X_(k)")
t<-c(1:1000)/1000
lines(t,dbeta(t,k,n_sample+1-k), col = 2)

## check mean and variance
mean(xk)
## [1] 0.9901069
k/(n_sample + 1)
## [1] 0.990099
var(xk)
## [1] 0.0001002259
k*(n_sample+1-k)/((n_sample+1)^2*(n_sample+2))
## [1] 9.610746e-05