Zhiguang Huo (Caleb)
Monday October 30, 2017
Simulation:
Typical Monte Carlo simulation involves the following procedure:
Example:
Consider an estimator for a mean parameter \(\theta\):
We will have more concrete examples later.
Note: 20% trimmed mean indicates average value after deleting first 10% quantile and last 10% quantile.
E.g. 3,2,4,7,1,6,5,8,9,10
k: mean, median, mean20%
\[T^{mean} = \frac{1}{n}\sum_{i=1}^n Y_i\]
Can be calculated analytically
\[T^{median} = Median_{i=1}^n Y_i\]
Variance is hard to be calculated analytically
Can be calculated
\[T^{mean20\%} = \frac{1}{0.8n}\sum_{i=0.1n+1}^{0.9n} Y_i\]
Variance is hard to be calculated analytically
Since sometimes it is hard to evaluate the estimator analytically, we can evaluate use simulation studies.
mu <- 1; B <- 1000; n <- 20
T1 <- numeric(B); T2 <- numeric(B); T3 <- numeric(B)
for(b in 1:B){
set.seed(b)
x <- rnorm(n, mean = mu)
T1[b] <- mean(x)
T2[b] <- median(x)
T3[b] <- mean(x, trim = 0.1)
}
bias_T1 <- abs(mean(T1) - mu); bias_T2 <- abs(mean(T2) - mu); bias_T3 <- abs(mean(T3) - mu)
bias <- c(bias_T1, bias_T2, bias_T3)
var_T1 <- var(T1); var_T2 <- var(T2); var_T3 <- var(T3)
vars <- c(var_T1, var_T2, var_T3)
MSE1 <- bias_T1^2 + var_T1; MSE2 <- bias_T2^2 + var_T2; MSE3 <- bias_T3^2 + var_T3
MSE <- c(MSE1, MSE2, MSE3)
RE <- MSE1/MSE
comparisonTable <- rbind(bias, vars, MSE, RE)
colnames(comparisonTable) <- c("mean", "median", "mean 20% trim")
comparisonTable
## mean median mean 20% trim
## bias 3.832736e-05 0.006673072 0.000688909
## vars 5.174830e-02 0.074680145 0.054550469
## MSE 5.174830e-02 0.074724675 0.054550944
## RE 1.000000e+00 0.692519633 0.948623449
mu <- 1; B <- 1000; n <- 20
T1 <- numeric(B); T2 <- numeric(B); T3 <- numeric(B)
for(b in 1:B){
set.seed(b)
x <- rexp(n, rate = mu)
T1[b] <- mean(x)
T2[b] <- median(x)
T3[b] <- mean(x, trim = 0.1)
}
bias_T1 <- abs(mean(T1) - mu); bias_T2 <- abs(mean(T2) - mu); bias_T3 <- abs(mean(T3) - mu)
bias <- c(bias_T1, bias_T2, bias_T3)
var_T1 <- var(T1); var_T2 <- var(T2); var_T3 <- var(T3)
vars <- c(var_T1, var_T2, var_T3)
MSE1 <- bias_T1^2 + var_T1; MSE2 <- bias_T2^2 + var_T2; MSE3 <- bias_T3^2 + var_T3
MSE <- c(MSE1, MSE2, MSE3)
comparisonTable <- rbind(bias, vars, MSE)
colnames(comparisonTable) <- c("mean", "median", "mean 20% trim")
comparisonTable
## mean median mean 20% trim
## bias 0.01040018 0.26539760 0.13243138
## vars 0.05021121 0.05052217 0.04247023
## MSE 0.05031938 0.12095806 0.06000830
Assume \(X \sim Cauchy(\mu,\gamma)\) with \(\mu = 0\) and \(\gamma = 1\). \[f(x; \mu, \gamma) = \frac{1}{\pi (1 + x^2)}\]
mu <- 0; B <- 1000; n <- 20
T1 <- numeric(B); T2 <- numeric(B); T3 <- numeric(B)
for(b in 1:B){
set.seed(b)
x <- rcauchy(n, location = mu)
T1[b] <- mean(x)
T2[b] <- median(x)
T3[b] <- mean(x, trim = 0.1)
}
bias_T1 <- abs(mean(T1) - mu); bias_T2 <- abs(mean(T2) - mu); bias_T3 <- abs(mean(T3) - mu)
bias <- c(bias_T1, bias_T2, bias_T3)
var_T1 <- var(T1); var_T2 <- var(T2); var_T3 <- var(T3)
vars <- c(var_T1, var_T2, var_T3)
MSE1 <- bias_T1^2 + var_T1; MSE2 <- bias_T2^2 + var_T2; MSE3 <- bias_T3^2 + var_T3
MSE <- c(MSE1, MSE2, MSE3)
comparisonTable <- rbind(bias, vars, MSE)
colnames(comparisonTable) <- c("mean", "median", "mean 20% trim")
comparisonTable
## mean median mean 20% trim
## bias 0.1187952 0.005811866 0.00296929
## vars 742.5037597 0.146730729 0.37387966
## MSE 742.5178721 0.146764506 0.37388847
Uniformly minimum-variance unbiased estimator (UMVUE) is an unbiased estimator that has lower variance than any other unbiased estimator for all possible values of the parameter.
Blue: the best linear unbiased estimator (BLUE) of the coefficients is given by the ordinary least squares (OLS) estimator,
\[\hat{\beta}^{ols} = \arg \min_\beta \|y - X\beta\|^2_2\]
Modern statistics care about both bias and variance of an estimator instead of only unbiased estimator.
Lasso estimator
\[\hat{\beta}^{lasso} = \arg \min_\beta \|y - X\beta\|^2_2 + \lambda \|\beta\|_1\]
Model setup:
Goal:
library(lars)
## Loaded lars 1.2
beta0 <- 1; beta1 <- 3; beta2 <- 5
beta3 <- beta4 <- beta5 <- 0
beta <- c(beta0,beta1,beta2,beta3,beta4,beta5)
p <- 5; n <- 30
set.seed(32611)
X0 <- matrix(rnorm(p*n),nrow=n)
X <- cbind(1, X0) ## X fixed
Y0 <- X %*% as.matrix(beta, ncol=1)
B <- 1000
beta_ols <- replicate(n = B, expr = list())
beta_lasso <- replicate(n = B, expr = list())
for(b in 1:B){
set.seed(b)
error <- rnorm(length(Y0), sd = 3)
Y <- Y0 + error
abeta_ols <- lm(Y~X0)$coefficients
beta_ols[[b]] <- abeta_ols
alars <- lars(x = X, y = Y, intercept=F)
beta_lasso[[b]] <- alars
}
beta_ols_hat <- Reduce("+",beta_ols)/length(beta_ols)
beta_ols_bias <- sqrt(sum((beta_ols_hat - beta)^2))
beta_ols_variance <- sum(Reduce("+", lapply(beta_ols,function(x) (x - beta_ols_hat)^2))/(length(beta_ols) - 1))
beta_ols_MSE <- beta_ols_bias + beta_ols_variance
as <- seq(0,10,0.1)
lassoCoef <- lapply(beta_lasso, function(alars){
alassoCoef <- t(coef(alars, s=as, mode="lambda"))
alassoCoef
} )
beta_lasso_hat <- Reduce("+",lassoCoef)/length(lassoCoef)
beta_lasso_bias <- sqrt(colSums((beta_lasso_hat - beta)^2))
beta_lasso_variance <- colSums(Reduce("+", lapply(lassoCoef,function(x) (x - beta_lasso_hat)^2))/(length(beta_ols) - 1))
beta_lasso_MSE <- beta_lasso_bias + beta_lasso_variance
par(mfrow=c(2,2))
plot(x = as, y = beta_lasso_bias, type = "l", xlab="lambda", main="bias")
abline(h = beta_ols_bias, lty=2)
legend("topleft", legend = c("lasso", "OLS"), col = 1, lty = c(1,2))
plot(x = as, y = beta_lasso_variance, col=4, type = "l", xlab="lambda", main="variance")
abline(h = beta_ols_variance, lty=2, col=4)
legend("bottomleft", legend = c("lasso", "OLS"), col = 4, lty = c(1,2))
plot(x = as, y = beta_lasso_MSE, col=2, type = "l", xlab="lambda", main="MSE")
abline(h = beta_ols_MSE, lty=2, col=2)
legend("topleft", legend = c("lasso", "OLS"), col = 2, lty = c(1,2))
plot(x = as, y = beta_lasso_MSE, ylim = c(min(beta_lasso_bias), max(beta_lasso_MSE)),
type="n", xlab="lambda", main="MSE = bias^2 + variance")
lines(x = as, y = beta_lasso_MSE, type = "l", col=2)
lines(x = as, y = beta_lasso_variance, col=4)
lines(x = as, y = beta_lasso_bias, col=1)
legend("bottomright", legend = c("bias", "variance", "MSE"), col = c(1,4,2), lty = c(1))
Consider the following tests:
Simulation for sizes of hypothesis tests
Consider using the following simulation setting
alpha <- 0.05; mu=0; n <- 20; B <- 1000
counts <- 0
for(b in 1:B){
set.seed(b)
x <- rnorm(n, mean=mu)
atest <- t.test(x)
if(atest$p.value < alpha){
counts <- counts + 1
}
}
counts/B
## [1] 0.05
alpha <- 0.05; mu=0; n <- 20; B <- 1000
counts <- 0
for(b in 1:B){
set.seed(b)
x <- rnorm(n, mean=mu)
atest <- wilcox.test(x)
if(atest$p.value < alpha){
counts <- counts + 1
}
}
counts/B
## [1] 0.047
alpha <- 0.05; mu=0; n <- 20; B <- 1000
counts <- 0
for(b in 1:B){
set.seed(b)
x <- rnorm(n, mean=mu)[1:10]
atest <- t.test(x)
if(atest$p.value < alpha){
counts <- counts + 1
}
}
counts/B
## [1] 0.056
alpha <- 0.05; mu=0; mu1 <- 1; n <- 20; B <- 1000
counts <- 0
for(b in 1:B){
set.seed(b)
x <- rnorm(n, mean=mu1)
atest <- t.test(x)
if(atest$p.value < alpha){
counts <- counts + 1
}
}
counts / B
## [1] 0.985
library(pwr)
alpha <- 0.05; mu=0; mu1 <- 1; n <- 20; B <- 1000
pwr.t.test(n = n, d = mu1, sig.level = alpha, type = "one.sample", alternative = "two.sided")
##
## One-sample t test power calculation
##
## n = 20
## d = 1
## sig.level = 0.05
## power = 0.9885913
## alternative = two.sided
alpha <- 0.05; mu=0; mu1 <- 1; n <- 20; B <- 1000
counts <- 0
for(b in 1:B){
set.seed(b)
x <- rnorm(n, mean=mu1)
atest <- wilcox.test(x)
if(atest$p.value < alpha){
counts <- counts + 1
}
}
counts / B
## [1] 0.979
alpha <- 0.05; mu=0; mu1 <- 1; n <- 20; B <- 1000
counts <- 0
for(b in 1:B){
set.seed(b)
x <- rnorm(n, mean=mu1)[1:10]
atest <- t.test(x)
if(atest$p.value < alpha){
counts <- counts + 1
}
}
counts / B
## [1] 0.805
alpha <- 0.05; mu=0; mu1 <- 1; n <- 20; B <- 1000
pwr.t.test(n = 10, d = mu1, sig.level = alpha, type = "one.sample", alternative = "two.sided")
##
## One-sample t test power calculation
##
## n = 10
## d = 1
## sig.level = 0.05
## power = 0.8030969
## alternative = two.sided
A \(1 - \alpha\) confidence interval for parameter \(\mu\) is an interval \(C_n = (a,b)\) where \(a = a(X_1, \ldots, X_n)\) and \(b = b(X_1, \ldots, X_n)\) are functions of the data such that \[P(\mu \in C_n) \ge 1 - \alpha\] In other words, \((a,b)\) traps \(\mu\) with probability \(1 - \alpha\). We call \(1 - \alpha\) the coverage of the confidence interval.
Interpretation:
mu <- 1
B <- 1000
n <- 10
x <- rnorm(n, mean=mu)
c(mean(x) - qt(0.975, n-1) * sd(x) / sqrt(n), mean(x) + qt(0.975, n-1) * sd(x) / sqrt(n))
## [1] 0.400803 2.219041
t.test(x)
##
## One Sample t-test
##
## data: x
## t = 3.2595, df = 9, p-value = 0.009847
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.400803 2.219041
## sample estimates:
## mean of x
## 1.309922
counts <- 0
for(b in 1:B){
set.seed(b)
x <- rnorm(n, mean=mu)
atest <- t.test(x)
if(atest$conf.int[1] < mu & mu < atest$conf.int[2]){
counts <- counts + 1
}
}
counts/B
## [1] 0.944
Is the Confidence interval accurate in finite samples?
set.seed(32611)
B <- 1000 ## number of test
counts <- 0 ## number of accepted tests
n <- 10
mu <- 1
alpha <- 0.05
for(b in 1:B){
x <- rnorm(n, mean=mu, sd = 1)
lb <- mean(x) - qnorm(alpha/2, lower.tail = F) * sd(x)/sqrt(n)
ub <- mean(x) + qnorm(alpha/2, lower.tail = F) * sd(x)/sqrt(n)
if(lb < mu & ub > mu){
counts <- counts + 1
}
}
print(counts/B)
## [1] 0.909
behavior in large sample
\(95\%\) confidence interval: \((\bar{X} - 1.96 \times \frac{\hat{\sigma}}{\sqrt{n}}, \bar{X} + 1.96 \times \frac{\hat{\sigma}}{\sqrt{n}})\)
set.seed(32611)
B <- 1000 ## number of test
counts <- 0 ## number of accepted tests
n <- 100
mu <- 1
alpha <- 0.05
for(b in 1:B){
x <- rnorm(n, mean=mu, sd = 1)
lb <- mean(x) - qnorm(alpha/2, lower.tail = F) * sd(x)/sqrt(n)
ub <- mean(x) + qnorm(alpha/2, lower.tail = F) * sd(x)/sqrt(n)
if(lb < mu & ub > mu){
counts <- counts + 1
}
}
print(counts/B)
## [1] 0.945
knitr::purl("simulation.rmd", output = "simulation.R ", documentation = 2)
##
##
## processing file: simulation.rmd
##
|
| | 0%
|
|.. | 3%
|
|.... | 5%
|
|..... | 8%
|
|....... | 11%
|
|......... | 14%
|
|........... | 16%
|
|............ | 19%
|
|.............. | 22%
|
|................ | 24%
|
|.................. | 27%
|
|................... | 30%
|
|..................... | 32%
|
|....................... | 35%
|
|......................... | 38%
|
|.......................... | 41%
|
|............................ | 43%
|
|.............................. | 46%
|
|................................ | 49%
|
|................................. | 51%
|
|................................... | 54%
|
|..................................... | 57%
|
|....................................... | 59%
|
|........................................ | 62%
|
|.......................................... | 65%
|
|............................................ | 68%
|
|.............................................. | 70%
|
|............................................... | 73%
|
|................................................. | 76%
|
|................................................... | 78%
|
|..................................................... | 81%
|
|...................................................... | 84%
|
|........................................................ | 86%
|
|.......................................................... | 89%
|
|............................................................ | 92%
|
|............................................................. | 95%
|
|............................................................... | 97%
|
|.................................................................| 100%
## output file: simulation.R
## [1] "simulation.R "