Biostatistical Computing, PHC 6068

Simulation studies

Zhiguang Huo (Caleb)

Monday October 30, 2017

Outlines

What is a simulation study?

Simulation:

Rationale for simulation study

Common questions simulation study can answer

Monte Carlo simulation approximation

How to approximate (Draw a picture to illustrate)

Typical Monte Carlo simulation involves the following procedure:

  1. Generate \(B\) independent datasets \((1, 2, \ldots, B)\) under conditions of interest.
  2. Compute the numerical value of an estimator/test statistic \(T\) from each dataset. Then we have \(T_1, T_2, \ldots, T_b, \ldots, T_B\).
  3. If \(B\) is large enough, summary statistics across \(T_1, \ldots, T_B\) should be good approximations to the true properties of the estimator/test statistic.

Example:

Consider an estimator for a mean parameter \(\theta\):

We will have more concrete examples later.

Example 1, Compare estimator (normal distribution)

Note: 20% trimmed mean indicates average value after deleting first 10% quantile and last 10% quantile.

E.g. 3,2,4,7,1,6,5,8,9,10

Evaluation criteria

k: mean, median, mean20%

\(T^{mean}\)

\[T^{mean} = \frac{1}{n}\sum_{i=1}^n Y_i\]

Can be calculated analytically

\(T^{median}\)

\[T^{median} = Median_{i=1}^n Y_i\]

Variance is hard to be calculated analytically

\(T^{mean20\%}\)

Can be calculated

\[T^{mean20\%} = \frac{1}{0.8n}\sum_{i=0.1n+1}^{0.9n} Y_i\]

Variance is hard to be calculated analytically

Evaluation via simulation

Since sometimes it is hard to evaluate the estimator analytically, we can evaluate use simulation studies.

Simulation experiment

mu <- 1; B <- 1000; n <- 20

T1 <- numeric(B); T2 <- numeric(B); T3 <- numeric(B)

for(b in 1:B){
  set.seed(b)
  x <- rnorm(n, mean = mu)
  
  T1[b] <- mean(x)
  T2[b] <- median(x)
  T3[b] <- mean(x, trim = 0.1)
}

bias_T1 <- abs(mean(T1) - mu); bias_T2 <- abs(mean(T2) - mu); bias_T3 <- abs(mean(T3) - mu)
bias <- c(bias_T1, bias_T2, bias_T3)

var_T1 <- var(T1); var_T2 <- var(T2); var_T3 <- var(T3)
vars <- c(var_T1, var_T2, var_T3)

MSE1 <- bias_T1^2 + var_T1; MSE2 <- bias_T2^2 + var_T2; MSE3 <- bias_T3^2 + var_T3
MSE <- c(MSE1, MSE2, MSE3)

RE <- MSE1/MSE

comparisonTable <- rbind(bias, vars, MSE, RE)
colnames(comparisonTable) <- c("mean", "median", "mean 20% trim")
comparisonTable
##              mean      median mean 20% trim
## bias 3.832736e-05 0.006673072   0.000688909
## vars 5.174830e-02 0.074680145   0.054550469
## MSE  5.174830e-02 0.074724675   0.054550944
## RE   1.000000e+00 0.692519633   0.948623449

Example 2, Compare estimator (exponential distribution)

Simulation experiment

mu <- 1; B <- 1000; n <- 20

T1 <- numeric(B); T2 <- numeric(B); T3 <- numeric(B)

for(b in 1:B){
  set.seed(b)
  x <- rexp(n, rate = mu)
  
  T1[b] <- mean(x)
  T2[b] <- median(x)
  T3[b] <- mean(x, trim = 0.1)
}

bias_T1 <- abs(mean(T1) - mu); bias_T2 <- abs(mean(T2) - mu); bias_T3 <- abs(mean(T3) - mu)
bias <- c(bias_T1, bias_T2, bias_T3)

var_T1 <- var(T1); var_T2 <- var(T2); var_T3 <- var(T3)
vars <- c(var_T1, var_T2, var_T3)

MSE1 <- bias_T1^2 + var_T1; MSE2 <- bias_T2^2 + var_T2; MSE3 <- bias_T3^2 + var_T3
MSE <- c(MSE1, MSE2, MSE3)

comparisonTable <- rbind(bias, vars, MSE)
colnames(comparisonTable) <- c("mean", "median", "mean 20% trim")
comparisonTable
##            mean     median mean 20% trim
## bias 0.01040018 0.26539760    0.13243138
## vars 0.05021121 0.05052217    0.04247023
## MSE  0.05031938 0.12095806    0.06000830

Example 3, Compare estimator (Cauchy distribution)

Simulation experiment

mu <- 0; B <- 1000; n <- 20

T1 <- numeric(B); T2 <- numeric(B); T3 <- numeric(B)

for(b in 1:B){
  set.seed(b)
  x <- rcauchy(n, location  = mu)
  
  T1[b] <- mean(x)
  T2[b] <- median(x)
  T3[b] <- mean(x, trim = 0.1)
}

bias_T1 <- abs(mean(T1) - mu); bias_T2 <- abs(mean(T2) - mu); bias_T3 <- abs(mean(T3) - mu)
bias <- c(bias_T1, bias_T2, bias_T3)

var_T1 <- var(T1); var_T2 <- var(T2); var_T3 <- var(T3)
vars <- c(var_T1, var_T2, var_T3)

MSE1 <- bias_T1^2 + var_T1; MSE2 <- bias_T2^2 + var_T2; MSE3 <- bias_T3^2 + var_T3
MSE <- c(MSE1, MSE2, MSE3)

comparisonTable <- rbind(bias, vars, MSE)
colnames(comparisonTable) <- c("mean", "median", "mean 20% trim")
comparisonTable
##             mean      median mean 20% trim
## bias   0.1187952 0.005811866    0.00296929
## vars 742.5037597 0.146730729    0.37387966
## MSE  742.5178721 0.146764506    0.37388847

UMVUE

\[\hat{\beta}^{ols} = \arg \min_\beta \|y - X\beta\|^2_2\]

\[\hat{\beta}^{lasso} = \arg \min_\beta \|y - X\beta\|^2_2 + \lambda \|\beta\|_1\]

Example 4, evaluate bias, variance and MSE of OLS, lasso

Model setup:

Goal:

For simulation of OLS (ordinary least square)

For simulation of lasso

Simulation experiment

library(lars)
## Loaded lars 1.2
beta0 <- 1; beta1 <- 3; beta2 <- 5
beta3 <- beta4 <- beta5 <- 0
beta <- c(beta0,beta1,beta2,beta3,beta4,beta5)
p <- 5; n <- 30
set.seed(32611)
X0 <- matrix(rnorm(p*n),nrow=n)
X <- cbind(1, X0) ## X fixed

Y0 <- X %*% as.matrix(beta, ncol=1)

B <- 1000
beta_ols <- replicate(n = B, expr = list())
beta_lasso <- replicate(n = B, expr = list())

for(b in 1:B){
  set.seed(b)

  error <- rnorm(length(Y0), sd = 3)
  Y <- Y0 + error
  
  abeta_ols <- lm(Y~X0)$coefficients
  beta_ols[[b]] <- abeta_ols
  
  alars <- lars(x = X, y = Y, intercept=F)
  beta_lasso[[b]] <- alars
}

Simulation experiment (continue 2)

beta_ols_hat <- Reduce("+",beta_ols)/length(beta_ols)
beta_ols_bias <- sqrt(sum((beta_ols_hat - beta)^2))
beta_ols_variance <-  sum(Reduce("+", lapply(beta_ols,function(x) (x - beta_ols_hat)^2))/(length(beta_ols) - 1))
beta_ols_MSE <- beta_ols_bias + beta_ols_variance

as <- seq(0,10,0.1)
lassoCoef <- lapply(beta_lasso, function(alars){
  alassoCoef <- t(coef(alars, s=as, mode="lambda"))
  alassoCoef
} )

beta_lasso_hat <- Reduce("+",lassoCoef)/length(lassoCoef)
beta_lasso_bias <- sqrt(colSums((beta_lasso_hat - beta)^2))
beta_lasso_variance <-  colSums(Reduce("+", lapply(lassoCoef,function(x) (x - beta_lasso_hat)^2))/(length(beta_ols) - 1))
beta_lasso_MSE <- beta_lasso_bias + beta_lasso_variance

Simulation experiment (continue 3)

par(mfrow=c(2,2))
plot(x = as, y = beta_lasso_bias, type = "l", xlab="lambda", main="bias")
abline(h = beta_ols_bias, lty=2)
legend("topleft", legend = c("lasso", "OLS"), col = 1, lty = c(1,2))

plot(x = as, y = beta_lasso_variance, col=4, type = "l", xlab="lambda", main="variance")
abline(h = beta_ols_variance, lty=2, col=4)
legend("bottomleft", legend = c("lasso", "OLS"), col = 4, lty = c(1,2))

plot(x = as, y = beta_lasso_MSE, col=2, type = "l", xlab="lambda", main="MSE")
abline(h = beta_ols_MSE, lty=2, col=2)
legend("topleft", legend = c("lasso", "OLS"), col = 2, lty = c(1,2))


plot(x = as, y = beta_lasso_MSE, ylim = c(min(beta_lasso_bias), max(beta_lasso_MSE)), 
     type="n", xlab="lambda", main="MSE = bias^2 + variance")
lines(x = as, y = beta_lasso_MSE, type = "l", col=2)
lines(x = as, y = beta_lasso_variance, col=4)
lines(x = as, y = beta_lasso_bias, col=1)
legend("bottomright", legend = c("bias", "variance", "MSE"), col = c(1,4,2), lty = c(1))

Hypothesis testing (one-sided)

Hypothesis testing (two-sided)

Does a hypothesis testing procedure attain the nominal (advertised) level of significance or size?

Consider the following tests:

Simulation for sizes of hypothesis tests

Consider using the following simulation setting

Example 5, t.test() size

alpha <- 0.05; mu=0; n <- 20; B <- 1000

counts <- 0
for(b in 1:B){
  set.seed(b)
  
  x <- rnorm(n, mean=mu)
  atest <- t.test(x)
  
  if(atest$p.value < alpha){
    counts <- counts + 1
  }
}

counts/B
## [1] 0.05

Example 6, wilcox.test() size

alpha <- 0.05; mu=0; n <- 20; B <- 1000

counts <- 0
for(b in 1:B){
  set.seed(b)
  
  x <- rnorm(n, mean=mu)
  atest <- wilcox.test(x)
  
  if(atest$p.value < alpha){
    counts <- counts + 1
  }
}

counts/B
## [1] 0.047

Example 7, t.test() only use first 10 samples size

alpha <- 0.05; mu=0; n <- 20; B <- 1000

counts <- 0
for(b in 1:B){
  set.seed(b)
  
  x <- rnorm(n, mean=mu)[1:10]
  atest <- t.test(x)
  
  if(atest$p.value < alpha){
    counts <- counts + 1
  }
}

counts/B
## [1] 0.056

Compare these test procedures

Example 8, t.test() power

alpha <- 0.05; mu=0; mu1 <- 1; n <- 20; B <- 1000

counts <- 0
for(b in 1:B){
  set.seed(b)
  
  x <- rnorm(n, mean=mu1)
  atest <- t.test(x)
  
  if(atest$p.value < alpha){
    counts <- counts + 1
  }
}
counts / B
## [1] 0.985

Example 8, Validate the result using R function

library(pwr)
alpha <- 0.05; mu=0; mu1 <- 1; n <- 20; B <- 1000
pwr.t.test(n = n, d = mu1, sig.level = alpha, type = "one.sample", alternative = "two.sided")
## 
##      One-sample t test power calculation 
## 
##               n = 20
##               d = 1
##       sig.level = 0.05
##           power = 0.9885913
##     alternative = two.sided

Example 9, wilcox.test() power

alpha <- 0.05; mu=0; mu1 <- 1; n <- 20; B <- 1000

counts <- 0
for(b in 1:B){
  set.seed(b)
  
  x <- rnorm(n, mean=mu1)
  atest <- wilcox.test(x)
  
  if(atest$p.value < alpha){
    counts <- counts + 1
  }
}
counts / B
## [1] 0.979

Example 10, t.test() only use first 10 samples power

alpha <- 0.05; mu=0; mu1 <- 1; n <- 20; B <- 1000

counts <- 0
for(b in 1:B){
  set.seed(b)
  
  x <- rnorm(n, mean=mu1)[1:10]
  atest <- t.test(x)
  
  if(atest$p.value < alpha){
    counts <- counts + 1
  }
}
counts / B
## [1] 0.805

Example 10, , Validate the result using R function

alpha <- 0.05; mu=0; mu1 <- 1; n <- 20; B <- 1000
pwr.t.test(n = 10, d = mu1, sig.level = alpha, type = "one.sample", alternative = "two.sided")
## 
##      One-sample t test power calculation 
## 
##               n = 10
##               d = 1
##       sig.level = 0.05
##           power = 0.8030969
##     alternative = two.sided

Confidence interval

A \(1 - \alpha\) confidence interval for parameter \(\mu\) is an interval \(C_n = (a,b)\) where \(a = a(X_1, \ldots, X_n)\) and \(b = b(X_1, \ldots, X_n)\) are functions of the data such that \[P(\mu \in C_n) \ge 1 - \alpha\] In other words, \((a,b)\) traps \(\mu\) with probability \(1 - \alpha\). We call \(1 - \alpha\) the coverage of the confidence interval.

Interpretation:

Example 11, Confidence interval (\(\mu\) can be random)

mu <- 1
B <- 1000
n <- 10

x <- rnorm(n, mean=mu)
c(mean(x) - qt(0.975, n-1) * sd(x) / sqrt(n), mean(x) + qt(0.975, n-1) * sd(x) / sqrt(n))
## [1] 0.400803 2.219041
t.test(x)
## 
##  One Sample t-test
## 
## data:  x
## t = 3.2595, df = 9, p-value = 0.009847
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.400803 2.219041
## sample estimates:
## mean of x 
##  1.309922
counts <- 0
for(b in 1:B){
  set.seed(b)

  x <- rnorm(n, mean=mu)
  atest <- t.test(x)
  
  if(atest$conf.int[1] < mu & mu < atest$conf.int[2]){
    counts <- counts + 1
  }
}

counts/B
## [1] 0.944

Example 12, Confidence interval (using normal approximation, n=10)

Is the Confidence interval accurate in finite samples?

set.seed(32611)
B <- 1000 ## number of test
counts <- 0 ## number of accepted tests
n <- 10
mu <- 1
alpha <- 0.05
for(b in 1:B){
  x <- rnorm(n, mean=mu, sd = 1)
  lb <- mean(x) - qnorm(alpha/2, lower.tail = F) * sd(x)/sqrt(n)
  ub <- mean(x) + qnorm(alpha/2, lower.tail = F) * sd(x)/sqrt(n)
  if(lb < mu & ub > mu){
    counts <- counts + 1
  }
}
print(counts/B)
## [1] 0.909

Example 12, Confidence interval (using normal approximation, n=100)

set.seed(32611)
B <- 1000 ## number of test
counts <- 0 ## number of accepted tests
n <- 100
mu <- 1
alpha <- 0.05
for(b in 1:B){
  x <- rnorm(n, mean=mu, sd = 1)
  lb <- mean(x) - qnorm(alpha/2, lower.tail = F) * sd(x)/sqrt(n)
  ub <- mean(x) + qnorm(alpha/2, lower.tail = F) * sd(x)/sqrt(n)
  if(lb < mu & ub > mu){
    counts <- counts + 1
  }
}
print(counts/B)
## [1] 0.945

Principle for simulation study

  1. A Monte Carlo simulation is just like any other experiment
    • Factors that are of interest to vary in the experiment: sample size n, distribution of the data, magnitude of variation…
    • Each combination of factors is a separate simulation, so that many factors can lead to very large number of combinations and thus number of simulations (time consuming)
    • Results must be recorded and saved in a systematic way
    • Don’t only choose factors favorable to a method you have developed!
    • Sample size B (number of data sets) must deliver acceptable precision.
  2. Save everything!
    • Save the individual estimates in a file and then analyze
    • as opposed to computing these summaries and saving only them
    • Critical if the simulation takes a long time to run!
  3. Keep S small at first
    • Test and refine code until you are sure everything is working correctly before carrying out final production runs
    • Get an idea of how long it takes to process one data set
  4. Set a different seed for each run and keep records
    • Ensure simulation runs are independent
    • Runs may be replicated if necessary
  5. Document your code

Further simulation topics

Reference

knitr::purl("simulation.rmd", output = "simulation.R ", documentation = 2)
## 
## 
## processing file: simulation.rmd
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |..                                                               |   3%
  |                                                                       
  |....                                                             |   5%
  |                                                                       
  |.....                                                            |   8%
  |                                                                       
  |.......                                                          |  11%
  |                                                                       
  |.........                                                        |  14%
  |                                                                       
  |...........                                                      |  16%
  |                                                                       
  |............                                                     |  19%
  |                                                                       
  |..............                                                   |  22%
  |                                                                       
  |................                                                 |  24%
  |                                                                       
  |..................                                               |  27%
  |                                                                       
  |...................                                              |  30%
  |                                                                       
  |.....................                                            |  32%
  |                                                                       
  |.......................                                          |  35%
  |                                                                       
  |.........................                                        |  38%
  |                                                                       
  |..........................                                       |  41%
  |                                                                       
  |............................                                     |  43%
  |                                                                       
  |..............................                                   |  46%
  |                                                                       
  |................................                                 |  49%
  |                                                                       
  |.................................                                |  51%
  |                                                                       
  |...................................                              |  54%
  |                                                                       
  |.....................................                            |  57%
  |                                                                       
  |.......................................                          |  59%
  |                                                                       
  |........................................                         |  62%
  |                                                                       
  |..........................................                       |  65%
  |                                                                       
  |............................................                     |  68%
  |                                                                       
  |..............................................                   |  70%
  |                                                                       
  |...............................................                  |  73%
  |                                                                       
  |.................................................                |  76%
  |                                                                       
  |...................................................              |  78%
  |                                                                       
  |.....................................................            |  81%
  |                                                                       
  |......................................................           |  84%
  |                                                                       
  |........................................................         |  86%
  |                                                                       
  |..........................................................       |  89%
  |                                                                       
  |............................................................     |  92%
  |                                                                       
  |.............................................................    |  95%
  |                                                                       
  |...............................................................  |  97%
  |                                                                       
  |.................................................................| 100%
## output file: simulation.R
## [1] "simulation.R "