Zhiguang Huo (Caleb)
Wednesday November 3, 2021
## [1] 8 4
## [1] 6 4
## [1] 10  4
## [1] 1 7
set.seed(32611) ## if you keep the same random seed, you will end up with the exact same result
sample(x = a, size = 2)## [1] 1 7
set.seed(32611) ## if you keep the same random seed, you will end up with the exact same result
sample(x = a, size = 2)## [1] 1 7
## [1] 1 7
## [1] 3 7
## [1] 8 5
## [1] 1 7
## [1] 3 7
## [1] 8 5
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.3
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] compiler_4.0.3  magrittr_2.0.1  fastmap_1.1.0   tools_4.0.3    
##  [5] htmltools_0.5.2 yaml_2.2.1      stringi_1.5.3   rmarkdown_2.6  
##  [9] knitr_1.30      stringr_1.4.0   xfun_0.19       digest_0.6.27  
## [13] rlang_0.4.10    evaluate_0.14
## [1] 1 7
##  [1]  3  7  9  5  8  1  2  4  6 10
##  [1] 3 8 9 7 2 6 5 5 7 3
##  [1] "G" "C" "A" "J" "G" "I" "I" "B" "D" "H"
For normal distribution:
| Distribution | R command | 
|---|---|
| binomial | rbinom | 
| Poisson | rpois | 
| geometric | rgeom | 
| negative binomial | rnbinom | 
| uniform | runif | 
| exponential | rexp | 
| normal | rnorm | 
| gamma | rgamma | 
| beta | rbeta | 
| student t | rt | 
| F | rf | 
| chi-squared | rchisq | 
| Weibull | rweibull | 
| log normal | rlnorm | 
| Distribution | R command | 
|---|---|
| binomial | dbinom | 
| Poisson | dpois | 
| geometric | dgeom | 
| negative binomial | dnbinom | 
| uniform | dunif | 
| exponential | dexp | 
| normal | dnorm | 
| gamma | dgamma | 
| beta | dbeta | 
| student t | dt | 
| F | df | 
| chi-squared | dchisq | 
| Weibull | dweibull | 
| log normal | dlnorm | 
| Distribution | R command | 
|---|---|
| binomial | pbinom | 
| Poisson | ppois | 
| geometric | pgeom | 
| negative binomial | pnbinom | 
| uniform | punif | 
| exponential | pexp | 
| normal | pnorm | 
| gamma | pgamma | 
| beta | pbeta | 
| student t | pt | 
| F | pf | 
| chi-squared | pchisq | 
| Weibull | pweibull | 
| log normal | plnorm | 
| Distribution | R command | 
|---|---|
| binomial | qbinom | 
| Poisson | qpois | 
| geometric | qgeom | 
| negative binomial | qnbinom | 
| uniform | qunif | 
| exponential | qexp | 
| normal | qnorm | 
| gamma | qgamma | 
| beta | qbeta | 
| student t | qt | 
| F | qf | 
| chi-squared | qchisq | 
| Weibull | qweibull | 
| log normal | qlnorm | 
\[f(x;\mu,\sigma) = \frac{1}{\sqrt{2\pi \sigma^2}} e^{-\frac{(x-\mu)^2}{2\sigma^2}}\]
aseq <- seq(-4,4,.01)
plot(aseq,dnorm(aseq, 0, 1),type='l', xlab='x', ylab='Density', lwd=2)
lines(aseq,dnorm(aseq, 1, 1),col=2, lwd=2)
lines(aseq,dnorm(aseq,0, 2),col=3, lwd=2)
legend("topleft",c(expression(paste(mu==0, ", " ,  sigma==1 ,sep=' ')), 
             expression(paste(mu==1, ", " ,  sigma==1 ,sep=' ')), 
             expression(paste(mu==0, ", " ,  sigma==2 ,sep=' '))), 
       col=1:3, lty=c(1,1,1), lwd=c(2,2,2), cex=1, bty='n')
mtext(side=3,line=.5,'Normal distributions',cex=1, font=2)aseq <- seq(-4,4,.01)
plot(aseq,dnorm(aseq),type='l', xlab='x', ylab='Density', lwd=2) ## same as dt(aseq, Inf)
lines(aseq,dt(aseq,10),col=2, lwd=2)
lines(aseq,dt(aseq,4),col=3, lwd=2)
lines(aseq,dt(aseq,2),col=4, lwd=2)
legend("topleft",c(expression(normal), expression(paste(df==10,sep=' ')), 
             expression(paste(df==4,sep=' ')), 
             expression(paste(df==2,sep=' '))), 
       col=1:4, lty=c(1,1,1), lwd=c(2,2,2), cex=1, bty='n')
mtext(side=3,line=.5,'t distributions',cex=1, font=2)aseq <- seq(0,4,.01)
plot(aseq,dchisq(aseq, df = 1),type='l', xlab='x', ylab='Density', lwd=2, ylim = c(0,1))
lines(aseq,dchisq(aseq,df = 2),col=2, lwd=2)
lines(aseq,dchisq(aseq,df = 5),col=3, lwd=2)
lines(aseq,dchisq(aseq,df = 10),col=4, lwd=2)
legend("topright",c(expression({Chi^2}[(1)]), expression({Chi^2}[(2)]), 
             expression({Chi^2}[(5)]), 
             expression({Chi^2}[(10)])
             ), 
       col=1:4, lty=c(1,1,1), lwd=c(2,2,2), cex=1, bty='n')
mtext(side=3,line=.5,'Chi square distributions',cex=1, font=2)\(x \sim \chi^2(k)\)
\(E(x) = 2k\)
If \(x \sim N(0,1)\), then \(x^2 \sim \chi^2(1)\)
## verification via qq-plot
set.seed(32608)
n <- 1000
x1 <- rnorm(n)
x2 <- rchisq(n,df = 1)
## the best practice is to use the theoretical distribution
## for simplicity, we can also use the emperical distribution
s1_sorted <- sort(x1^2)
s2_sorted <- sort(x2)
plot(s1_sorted, s2_sorted, xlim = c(0,5), ylim = c(0,5))\[f(k;\lambda) = \frac{\lambda^k e^{-\lambda}}{k!},\] where \(k\) is non negative integer.
aseq <- seq(0,8,1)
par(mfrow=c(2,2))
for(i in 1:4){
  counts <- dpois(aseq,i)
  names(counts) <- aseq
  barplot(counts, xlab='x', ylab='Density', lwd=2, col=i, las = 2,  main=bquote(paste(lambda==.(i), sep=' ')), ylim=c(0, 0.4))
}\[f(x;\alpha,\beta) = \frac{\Gamma(\alpha + \beta)}{\Gamma(\alpha) \Gamma(\beta)} x^{\alpha - 1}(1 - x)^{\beta - 1}\]
aseq <- seq(.001,.999,.001)
plot(aseq,dbeta(aseq,.25,.25), type='l', ylim=c(0,6), ylab='Density', xlab='Proportion (p)',lwd=2)
lines(aseq, dbeta(aseq,2,2),lty=2,lwd=2)
lines(aseq, dbeta(aseq,2,5),lty=1,col=2,lwd=2)
lines(aseq, dbeta(aseq,12,2),lty=2,col=2,lwd=2)
lines(aseq, dbeta(aseq,20,.75),lty=1,col='green',lwd=2)
lines(aseq, dbeta(aseq,1,1),lty=2,lwd=2, col=4)
legend(.2,6,c(expression(paste(alpha==.25,', ', beta==.25)), expression(paste(alpha==2,', ',beta==2)), expression(paste(alpha==2,', ', beta==5)), expression(paste(alpha==12,', ',beta==2)), expression(paste(alpha==20,', ',beta==.75)), expression(paste(alpha==1,', ', beta==1))), lty=c(1,2,1,2,1,2), col=c(1,1,2,2,'green',4), cex=1,bty='n',lwd=rep(2,6))
mtext(side=3,line=.5,'Beta distributions',cex=1, font=2)\(x \sim \mbox{Beta}(\alpha, \beta)\)
\(f(x;\alpha,\beta) = \frac{\Gamma(\alpha + \beta)}{\Gamma(\alpha) \Gamma(\beta)} x^{\alpha - 1}(1 - x)^{\beta - 1}\)
\(E(x) = \frac{\alpha}{\alpha + \beta}\)
when \(\alpha = \beta = 1\), Beta distribution reduces to UNIF(0, 1)
\[f(x;k,\theta) = \frac{1}{\Gamma(k) \theta^k} x^{k-1}e^{-\frac{x}{\theta}}\]
aseq <- seq(0,7,.01)
plot(aseq,dgamma(aseq,shape=1,scale=1),type='l', xlab='x', ylab='Density', lwd=2)
lines(aseq,dgamma(aseq,shape=2,scale=1),col=4, lwd=2)
lines(aseq,dgamma(aseq,shape=4,scale=4),col=2, lwd=2)
legend(3,1,c(expression(paste(k==1,', ',theta==1,sep=' ')), expression(paste(k==2,', ',theta==1,sep=' ')), expression(paste(k==4,', ', theta==4,sep=' '))), col=c(1,4,2), lty=c(1,1,1), lwd=c(2,2,2), cex=1, bty='n')
mtext(side=3,line=.5,'Gamma distributions',cex=1, font=2)\(x \sim \mbox{Gamma}(k, \theta)\)
\(f(x;k,\theta) = \frac{1}{\Gamma(k) \theta^k} x^{k-1}e^{-\frac{x}{\theta}}\)
\(y \sim \mbox{EXP}(\theta)\) if \(y \sim \mbox{Gamma}(1, \theta)\)
\(y \sim \chi^2(v)\) if \(y \sim \mbox{Gamma}(v/2, 2)\)
A positive random variable \(X\) is log-normally distributed if the logarithm of X is normally distributed, \[\ln(X) \sim N(\mu, \sigma^2)\]
aseq <- seq(0,7,.01)
plot(aseq,dlnorm(aseq,.1,2),type='l', xlab='x', ylab='Density', lwd=2)
lines(aseq,dlnorm(aseq,2,1),col=4, lwd=2)
lines(aseq,dlnorm(aseq,0,1),col=2, lwd=2)
legend(3,1.2,c(expression(paste(mu==0.1,', ',sigma==2,sep=' ')), expression(paste(mu==2,', ',sigma==1,sep=' ')), expression(paste(mu==0,', ',sigma==1,sep=' '))), col=c(1,4,2), lty=c(1,1,1), lwd=c(2,2,2), cex=1,bty='n')
mtext(side=3,line=.5,'Lognormal distributions',cex=1, font=2)set.seed(32608)
z <- rnorm(1000)
hist(z,nclass=50, freq = F)
lines(density(z), col=2, lwd=2)
curve(dnorm, from = -3, to = 4 ,col=4, lwd=2, add = T)
legend("topright", c("empirical", "theoritical"), col=c(2,4), lwd=c(2,2))set.seed(32608)
z <- rnorm(1000)
plot(ecdf(z), ylab="Distribution", main="Empirical distribution",
     lwd=2, col="red")
curve(pnorm, from = -3, to = 3, lwd=2, add = T)
legend("topleft", legend=c("Empirical distribution", "Actual distribution"),
       lwd=2, col=c("red","black"))The weak law of large number states the sample average converges in probability towards the expected value.
\[\frac{1}{n} \sum_{i=1}^n X_i \rightarrow \mathbb{E}(X)\]
## [1] 0.02275013
## [1] 0.04550026## [1] 1.959964
## [1] -1.959964
\[X \sim N( \begin{pmatrix} \mu_1 \\ \mu_2 \end{pmatrix}, \begin{pmatrix} \sigma_{11} & \sigma_{12} \\ \sigma_{21} & \sigma_{22} \end{pmatrix} )\]
##      [,1] [,2]
## [1,]   10    3
## [2,]    3    2
##          [,1]     [,2]
## [1,] 9.470288 3.032126
## [2,] 3.032126 2.142785
mu <- c(0,0)
Sigma1 <- matrix(c(1,0,0,1),2,2)
Sigma2 <- matrix(c(1,0,0,5),2,2)
Sigma3 <- matrix(c(5,0,0,1),2,2)
Sigma4 <- matrix(c(1,0.8,0.8,1),2,2)
Sigma5 <- matrix(c(1,1,1,1),2,2)
Sigma6 <- matrix(c(1,-0.8,-0.8,1),2,2)
arange <- c(-6,6)
par(mfrow=c(2,3))
plot(MASS::mvrnorm(n = 1000, mu, Sigma1), xlim=arange, ylim=arange)
plot(MASS::mvrnorm(n = 1000, mu, Sigma2), xlim=arange, ylim=arange)
plot(MASS::mvrnorm(n = 1000, mu, Sigma3), xlim=arange, ylim=arange)
plot(MASS::mvrnorm(n = 1000, mu, Sigma4), xlim=arange, ylim=arange)
plot(MASS::mvrnorm(n = 1000, mu, Sigma5), xlim=arange, ylim=arange)
plot(MASS::mvrnorm(n = 1000, mu, Sigma6), xlim=arange, ylim=arange)## Loading required package: stats4
## Loading required package: evd
## [1] 0.3602613
## [1] 1.913265
## [1] 0.8379001
\[F_Z(y) = P(F_X(X) \le y) = P(X \le F_X^{-1}(y)) = F_X(F_X^{-1}(y)) = y\] - If \(U\) is a uniform random variable who takes values in \([0, 1]\), \[F_U(y) = \int_{-\infty}^y f_U(u) du = \int_0^y du = y\]
Thus \(Z \sim UNIF(0, 1)\)
\(x \sim \mbox{Beta}(\alpha, \beta)\)
\(f(x;\alpha,\beta) = \frac{\Gamma(\alpha + \beta)}{\Gamma(\alpha) \Gamma(\beta)} x^{\alpha - 1}(1 - x)^{\beta - 1}\)
set.seed(32611)
n <- 10000
x1 <- rbeta(n, 3, 1)
x2_0 <- runif(n)
x2 <- x2_0^{1/3}
par(mfrow=c(1,2))
hist(x1, nclass=50)
hist(x2, nclass=50)