Zhiguang Huo (Caleb)
Wednesday Oct 12th, 2022
## [1] 4 6
## [1] 3 7
## [1] 10 5
## [1] 1 7
set.seed(32611) ## if you keep the same random seed, you will end up with the exact same result
sample(x = a, size = 2)
## [1] 1 7
set.seed(32611) ## if you keep the same random seed, you will end up with the exact same result
sample(x = a, size = 2)
## [1] 1 7
## [1] 1 7
## [1] 3 7
## [1] 8 5
## [1] 1 7
## [1] 3 7
## [1] 8 5
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] compiler_4.0.3 magrittr_2.0.1 fastmap_1.1.0 tools_4.0.3
## [5] htmltools_0.5.2 yaml_2.2.1 stringi_1.5.3 rmarkdown_2.6
## [9] knitr_1.30 stringr_1.4.0 xfun_0.19 digest_0.6.27
## [13] rlang_0.4.12 evaluate_0.14
## [1] 1 7
## [1] 3 7 9 5 8 1 2 4 6 10
## [1] 3 8 9 7 2 6 5 5 7 3
## [1] "G" "C" "A" "J" "G" "I" "I" "B" "D" "H"
For normal distribution:
Distribution | R command |
---|---|
binomial | rbinom |
Poisson | rpois |
geometric | rgeom |
negative binomial | rnbinom |
uniform | runif |
exponential | rexp |
normal | rnorm |
gamma | rgamma |
beta | rbeta |
student t | rt |
F | rf |
chi-squared | rchisq |
Weibull | rweibull |
log normal | rlnorm |
Distribution | R command |
---|---|
binomial | dbinom |
Poisson | dpois |
geometric | dgeom |
negative binomial | dnbinom |
uniform | dunif |
exponential | dexp |
normal | dnorm |
gamma | dgamma |
beta | dbeta |
student t | dt |
F | df |
chi-squared | dchisq |
Weibull | dweibull |
log normal | dlnorm |
Distribution | R command |
---|---|
binomial | pbinom |
Poisson | ppois |
geometric | pgeom |
negative binomial | pnbinom |
uniform | punif |
exponential | pexp |
normal | pnorm |
gamma | pgamma |
beta | pbeta |
student t | pt |
F | pf |
chi-squared | pchisq |
Weibull | pweibull |
log normal | plnorm |
Distribution | R command |
---|---|
binomial | qbinom |
Poisson | qpois |
geometric | qgeom |
negative binomial | qnbinom |
uniform | qunif |
exponential | qexp |
normal | qnorm |
gamma | qgamma |
beta | qbeta |
student t | qt |
F | qf |
chi-squared | qchisq |
Weibull | qweibull |
log normal | qlnorm |
\[f(x;\mu,\sigma) = \frac{1}{\sqrt{2\pi \sigma^2}} e^{-\frac{(x-\mu)^2}{2\sigma^2}}\]
aseq <- seq(-4,4,.01)
plot(aseq,dnorm(aseq, 0, 1),type='l', xlab='x', ylab='Density', lwd=2)
lines(aseq,dnorm(aseq, 1, 1),col=2, lwd=2)
lines(aseq,dnorm(aseq,0, 2),col=3, lwd=2)
legend("topleft",c(expression(paste(mu==0, ", " , sigma==1 ,sep=' ')),
expression(paste(mu==1, ", " , sigma==1 ,sep=' ')),
expression(paste(mu==0, ", " , sigma==2 ,sep=' '))),
col=1:3, lty=c(1,1,1), lwd=c(2,2,2), cex=1, bty='n')
mtext(side=3,line=.5,'Normal distributions',cex=1, font=2)
aseq <- seq(-4,4,.01)
plot(aseq,dnorm(aseq),type='l', xlab='x', ylab='Density', lwd=2) ## same as dt(aseq, Inf)
lines(aseq,dt(aseq,10),col=2, lwd=2)
lines(aseq,dt(aseq,4),col=3, lwd=2)
lines(aseq,dt(aseq,2),col=4, lwd=2)
legend("topleft",c(expression(normal), expression(paste(df==10,sep=' ')),
expression(paste(df==4,sep=' ')),
expression(paste(df==2,sep=' '))),
col=1:4, lty=c(1,1,1), lwd=c(2,2,2), cex=1, bty='n')
mtext(side=3,line=.5,'t distributions',cex=1, font=2)
aseq <- seq(0,4,.01)
plot(aseq,dchisq(aseq, df = 1),type='l', xlab='x', ylab='Density', lwd=2, ylim = c(0,1))
lines(aseq,dchisq(aseq,df = 2),col=2, lwd=2)
lines(aseq,dchisq(aseq,df = 5),col=3, lwd=2)
lines(aseq,dchisq(aseq,df = 10),col=4, lwd=2)
legend("topright",c(expression({Chi^2}[(1)]), expression({Chi^2}[(2)]),
expression({Chi^2}[(5)]),
expression({Chi^2}[(10)])
),
col=1:4, lty=c(1,1,1), lwd=c(2,2,2), cex=1, bty='n')
mtext(side=3,line=.5,'Chi square distributions',cex=1, font=2)
\(x \sim \chi^2(k)\)
\(E(x) = 2k\)
If \(x \sim N(0,1)\), then \(x^2 \sim \chi^2(1)\)
## verification via qq-plot
set.seed(32608)
n <- 1000
x1 <- rnorm(n)
x2 <- rchisq(n,df = 1)
## the best practice is to use the theoretical distribution
## for simplicity, we can also use the emperical distribution
s1_sorted <- sort(x1^2)
s2_sorted <- sort(x2)
plot(s1_sorted, s2_sorted, xlim = c(0,5), ylim = c(0,5))
\[f(k;\lambda) = \frac{\lambda^k e^{-\lambda}}{k!},\] where \(k\) is non negative integer.
\[f(x;\alpha,\beta) = \frac{\Gamma(\alpha + \beta)}{\Gamma(\alpha) \Gamma(\beta)} x^{\alpha - 1}(1 - x)^{\beta - 1}\]
aseq <- seq(.001,.999,.001)
plot(aseq,dbeta(aseq,.25,.25), type='l', ylim=c(0,6), ylab='Density', xlab='Proportion (p)',lwd=2)
lines(aseq, dbeta(aseq,2,2),lty=2,lwd=2)
lines(aseq, dbeta(aseq,2,5),lty=1,col=2,lwd=2)
lines(aseq, dbeta(aseq,12,2),lty=2,col=2,lwd=2)
lines(aseq, dbeta(aseq,20,.75),lty=1,col='green',lwd=2)
lines(aseq, dbeta(aseq,1,1),lty=2,lwd=2, col=4)
legend(.2,6,c(expression(paste(alpha==.25,', ', beta==.25)), expression(paste(alpha==2,', ',beta==2)), expression(paste(alpha==2,', ', beta==5)), expression(paste(alpha==12,', ',beta==2)), expression(paste(alpha==20,', ',beta==.75)), expression(paste(alpha==1,', ', beta==1))), lty=c(1,2,1,2,1,2), col=c(1,1,2,2,'green',4), cex=1,bty='n',lwd=rep(2,6))
mtext(side=3,line=.5,'Beta distributions',cex=1, font=2)
\(x \sim \mbox{Beta}(\alpha, \beta)\)
\(f(x;\alpha,\beta) = \frac{\Gamma(\alpha + \beta)}{\Gamma(\alpha) \Gamma(\beta)} x^{\alpha - 1}(1 - x)^{\beta - 1}\)
\(E(x) = \frac{\alpha}{\alpha + \beta}\)
when \(\alpha = \beta = 1\), Beta distribution reduces to UNIF(0, 1)
## [1] 0.02275013
## [1] 0.04550026
## [1] 1.959964
## [1] -1.959964
##
## Welch Two Sample t-test
##
## data: a and b
## t = -3.3236, df = 17.48, p-value = 0.0039
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.2295315 -0.5002657
## sample estimates:
## mean of x mean of y
## -0.1733548 1.1915438
##
## Paired t-test
##
## data: a and b
## t = -2.5027, df = 9, p-value = 0.03371
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.5985937 -0.1312035
## sample estimates:
## mean of the differences
## -1.364899
##
## One Sample t-test
##
## data: a
## t = -0.65621, df = 9, p-value = 0.5281
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.7709621 0.4242526
## sample estimates:
## mean of x
## -0.1733548
##
## Wilcoxon rank sum exact test
##
## data: a and b
## W = 13, p-value = 0.003886
## alternative hypothesis: true location shift is not equal to 0
##
## Wilcoxon signed rank exact test
##
## data: a and b
## V = 5, p-value = 0.01953
## alternative hypothesis: true location shift is not equal to 0
## Myocardial Infarction
## Treatment Yes No
## Placebo 189 10845
## Aspirin 104 10933
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: X
## X-squared = 24.429, df = 1, p-value = 7.71e-07
## Myocardial Infarction
## Treatment Yes No
## Placebo 189 10845
## Aspirin 104 10933
##
## Fisher's Exact Test for Count Data
##
## data: X
## p-value = 5.033e-07
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 1.432396 2.353927
## sample estimates:
## odds ratio
## 1.831993
set.seed(32611)
a <- rnorm(10,0,1)
b <- rnorm(10,1,1)
cor.test(a,b) ## parametric test: Gaussian assumption
##
## Pearson's product-moment correlation
##
## data: a and b
## t = -3.4699, df = 8, p-value = 0.008444
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.9440264 -0.2841809
## sample estimates:
## cor
## -0.7751172
##
## Spearman's rank correlation rho
##
## data: a and b
## S = 292, p-value = 0.01367
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.769697
##
## Two-sample Kolmogorov-Smirnov test
##
## data: x and y
## D = 0.38, p-value = 0.001315
## alternative hypothesis: two-sided
z <- runif(50)
ks.test(x,z)
Basic syntax for lm()
##
## Call:
## lm(formula = mpg ~ cyl, data = mtcars)
##
## Coefficients:
## (Intercept) cyl
## 37.885 -2.876
##
## Call:
## lm(formula = mpg ~ cyl, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.9814 -2.1185 0.2217 1.0717 7.5186
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.8846 2.0738 18.27 < 2e-16 ***
## cyl -2.8758 0.3224 -8.92 6.11e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.206 on 30 degrees of freedom
## Multiple R-squared: 0.7262, Adjusted R-squared: 0.7171
## F-statistic: 79.56 on 1 and 30 DF, p-value: 6.113e-10
##
## Call:
## lm(formula = mpg ~ cyl + disp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4213 -2.1722 -0.6362 1.1899 7.0516
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.66099 2.54700 13.609 4.02e-14 ***
## cyl -1.58728 0.71184 -2.230 0.0337 *
## disp -0.02058 0.01026 -2.007 0.0542 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.055 on 29 degrees of freedom
## Multiple R-squared: 0.7596, Adjusted R-squared: 0.743
## F-statistic: 45.81 on 2 and 29 DF, p-value: 1.058e-09
##
## Call:
## lm(formula = mpg ~ ., data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4506 -1.6044 -0.1196 1.2193 4.6271
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.30337 18.71788 0.657 0.5181
## cyl -0.11144 1.04502 -0.107 0.9161
## disp 0.01334 0.01786 0.747 0.4635
## hp -0.02148 0.02177 -0.987 0.3350
## drat 0.78711 1.63537 0.481 0.6353
## wt -3.71530 1.89441 -1.961 0.0633 .
## qsec 0.82104 0.73084 1.123 0.2739
## vs 0.31776 2.10451 0.151 0.8814
## am 2.52023 2.05665 1.225 0.2340
## gear 0.65541 1.49326 0.439 0.6652
## carb -0.19942 0.82875 -0.241 0.8122
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.65 on 21 degrees of freedom
## Multiple R-squared: 0.869, Adjusted R-squared: 0.8066
## F-statistic: 13.93 on 10 and 21 DF, p-value: 3.793e-07