Zhiguang Huo (Caleb)
Monday September 6, 2017
a <- 1:1000000
### version A, loop
start <- Sys.time()
meanA <- 0
for(i in seq_along(a)){
meanA <- meanA + a[i]/length(a)
}
end <- Sys.time()
end - start
## Time difference of 1.313023 secs
meanA
## [1] 500000.5
### version B, vectorized calculation
start <- Sys.time()
mean(a)
## [1] 500000.5
end <- Sys.time()
end - start
## Time difference of 0.005419016 secs
a <- 1:1000000
b <- 1000000:1
### version A, loop
start <- Sys.time()
result <- numeric(length(a)) ## create a vector with length length(a) and all elements 0
for(i in seq_along(a)){
result[i] <- a[i] + b[i]
}
end <- Sys.time()
end - start
## Time difference of 2.219551 secs
### version B, vectorized calculation
start <- Sys.time()
result <- a + b
end <- Sys.time()
end - start
## Time difference of 0.01295304 secs
a <- c(1.1,3.4,9.5)
b <- c(9,2,0.8)
a + b ## vector addition
## [1] 10.1 5.4 10.3
a * b ## vector multiplication
## [1] 9.9 6.8 7.6
a ^ b ## 1.1^9, 3.4^2, 9.5^0.8
## [1] 2.357948 11.560000 6.055903
a <- 1:8
a + 2
## [1] 3 4 5 6 7 8 9 10
a + c(0,1)
## [1] 1 3 3 5 5 7 7 9
a + c(1,2,3) ## warming message
## Warning in a + c(1, 2, 3): longer object length is not a multiple of
## shorter object length
## [1] 2 4 6 5 7 9 8 10
a <- seq(1,3,1)
sin(a)
## [1] 0.8414710 0.9092974 0.1411200
tan(a)
## [1] 1.5574077 -2.1850399 -0.1425465
log(a,base = 10)
## [1] 0.0000000 0.3010300 0.4771213
log10(a)
## [1] 0.0000000 0.3010300 0.4771213
exp(a)
## [1] 2.718282 7.389056 20.085537
plot(x = 1:10, y = 1:10, col=1:10)
plot(x = 1:10, y = 1:10, col=1)
n <- 5
a_int <- integer(n)
a_int
## [1] 0 0 0 0 0
b_int <- rep(0, n)
b_int
## [1] 0 0 0 0 0
c_int <- replicate(5,0)
c_int
## [1] 0 0 0 0 0
a_double <- double(n)
a_double
## [1] 0 0 0 0 0
a_char <- character(n)
a_char
## [1] "" "" "" "" ""
a_logical <- logical(n)
a_logical
## [1] FALSE FALSE FALSE FALSE FALSE
a <- seq(from=10, to=100, by=2)
a
## [1] 10 12 14 16 18 20 22 24 26 28 30 32 34 36 38 40 42
## [18] 44 46 48 50 52 54 56 58 60 62 64 66 68 70 72 74 76
## [35] 78 80 82 84 86 88 90 92 94 96 98 100
b <- seq(from=10, to=100, length=10)
b
## [1] 10 20 30 40 50 60 70 80 90 100
c <- seq(from=10, to=100, along.with=1:10)
c
## [1] 10 20 30 40 50 60 70 80 90 100
set.seed(32611) ## set a seed number such that the random numbers will keep the same
rnorm(n = 5, mean = 0, sd = 1)
## [1] -0.1023726 -1.8869039 -1.0586563 1.0227599 0.3982516
set.seed(32611)
rnorm(n = 5, mean = 0:4, sd = 1)
## [1] -0.1023726 -0.8869039 0.9413437 4.0227599 4.3982516
set.seed(32611)
sample(1:10,1)
## [1] 5
set.seed(32611)
runif(n = 4)
## [1] 0.45923048 0.37486058 0.02958663 0.51775341
replicate(4,list())
## [[1]]
## list()
##
## [[2]]
## list()
##
## [[3]]
## list()
##
## [[4]]
## list()
replicate(5, runif(sample(1:5,1)))
## [[1]]
## [1] 0.02609954
##
## [[2]]
## [1] 0.4031372 0.6547776 0.5071504 0.2504906 0.9807351
##
## [[3]]
## [1] 0.8903292 0.5409666 0.5041831
##
## [[4]]
## [1] 0.7552580 0.5850660 0.9150680 0.2715686
##
## [[5]]
## [1] 0.8858161 0.1869268
a <- matrix(1:6,nrow=3,ncol=2)
a ## matrix will be filled by column by default.
## [,1] [,2]
## [1,] 1 4
## [2,] 2 5
## [3,] 3 6
b <- matrix(6:1, nrow=3, ncol=2)
a * b ## similar to vector, matrix algebra will be done element-wise.
## [,1] [,2]
## [1,] 6 12
## [2,] 10 10
## [3,] 12 6
a + c(1,2,3) ## if a matrix add a vector, add by column
## [,1] [,2]
## [1,] 2 5
## [2,] 4 7
## [3,] 6 9
a <- matrix(1:6,nrow=3,ncol=2)
a
## [,1] [,2]
## [1,] 1 4
## [2,] 2 5
## [3,] 3 6
apply(a,1,sum) ## for each row, equivalent to rowSums
## [1] 5 7 9
apply(a,1,var) ## for each row, calculate the variance
## [1] 4.5 4.5 4.5
apply(a,1,function(x) x^2) ## can also use an anonymous function, note that R will always fill the result by column.
## [,1] [,2] [,3]
## [1,] 1 4 9
## [2,] 16 25 36
apply(a,2,sum) ## for each column, equivalent to colSums
## [1] 6 15
colSums(a)
## [1] 6 15
apply(a,2,mean) ## for each column, equivalent to colMeans
## [1] 2 5
apply(a,2,function(x) min(x)) ## equivalent to apply(a,2,min)
## [1] 1 4
apply(a,2,min)
## [1] 1 4
set.seed(32611)
(x <- matrix(rnorm(6,0,4),nrow=2))
## [,1] [,2] [,3]
## [1,] -0.4094903 -4.234625 1.593006
## [2,] -7.5476157 4.091040 -2.691786
x1 <- sweep(x,1,apply(x,1,min),'-')
x1
## [,1] [,2] [,3]
## [1,] 3.825135 0.00000 5.827631
## [2,] 0.000000 11.63866 4.855829
x2 <- sweep(x1,1,apply(x1,1,max),'/')
x2
## [,1] [,2] [,3]
## [1,] 0.656379 0 1.0000000
## [2,] 0.000000 1 0.4172157
l <- list(c(1:10), list(a='a'), c("dog","cat","gator"))
unlist(lapply(l, length))
## [1] 10 1 3
lapply2 <- function(x, f, ...){
out <- vector("list", length(x))
for(i in seq_along(x)){
out[[i]] <- f(x[[i]], ...)
}
out
}
unlist(lapply2(l, length))
## [1] 10 1 3
alist <- list(a = 25, b = 100, c = 64)
lapply(alist, function(x) sqrt(x))
## $a
## [1] 5
##
## $b
## [1] 10
##
## $c
## [1] 8
lapply(seq_along(alist), function(x) sqrt(alist[[x]]))
## [[1]]
## [1] 5
##
## [[2]]
## [1] 10
##
## [[3]]
## [1] 8
lapply(names(alist), function(x) sqrt(alist[[x]]))
## [[1]]
## [1] 5
##
## [[2]]
## [1] 10
##
## [[3]]
## [1] 8
aframe <- data.frame(col1=1:3,col2=4:6)
lapply(aframe, mean) ## as a list
## $col1
## [1] 2
##
## $col2
## [1] 5
apply(aframe, 2, mean) ## as a matrix
## col1 col2
## 2 5
compute_mean <- list(
base = function(x) mean(x),
sum = function(x){
sum(x)/length(x)},
mannual = function(x){
total <- 0
n <- length(x)
for(i in seq_along(x)){
total <- total + x[i]/n
}
total
}
)
set.seed(32611)
x <- runif(1e5)
lapply(compute_mean,function(f) system.time(f(x)))
## $base
## user system elapsed
## 0.001 0.000 0.001
##
## $sum
## user system elapsed
## 0.000 0.000 0.001
##
## $mannual
## user system elapsed
## 0.071 0.004 0.094
aframe <- data.frame(col1=1:3,col2=4:6)
sapply(aframe, sum) ## if not the same atomic type, will coerce to a list
## col1 col2
## 6 15
aframe <- data.frame(col1=1:3,col2=4:6)
vapply(aframe, sum, numeric(1))
## col1 col2
## 6 15
head(state.x77)
## Population Income Illiteracy Life Exp Murder HS Grad Frost
## Alabama 3615 3624 2.1 69.05 15.1 41.3 20
## Alaska 365 6315 1.5 69.31 11.3 66.7 152
## Arizona 2212 4530 1.8 70.55 7.8 58.1 15
## Arkansas 2110 3378 1.9 70.66 10.1 39.9 65
## California 21198 5114 1.1 71.71 10.3 62.6 20
## Colorado 2541 4884 0.7 72.06 6.8 63.9 166
## Area
## Alabama 50708
## Alaska 566432
## Arizona 113417
## Arkansas 51945
## California 156361
## Colorado 103766
head(state.region)
## [1] South West West South West West
## Levels: Northeast South North Central West
aggregate(state.x77, list(Region = state.region), mean)
## Region Population Income Illiteracy Life Exp Murder HS Grad
## 1 Northeast 5495.111 4570.222 1.000000 71.26444 4.722222 53.96667
## 2 South 4208.125 4011.938 1.737500 69.70625 10.581250 44.34375
## 3 North Central 4803.000 4611.083 0.700000 71.76667 5.275000 54.51667
## 4 West 2915.308 4702.615 1.023077 71.23462 7.215385 62.00000
## Frost Area
## 1 132.7778 18141.00
## 2 64.6250 54605.12
## 3 138.8333 62652.00
## 4 102.1538 134463.00
pulse <- round(rnorm(22,70,10/3) + rep(c(0,5), c(10,12)))
groups <- rep(c("A", "B"), c(10, 12))
tapply(pulse, groups, length)
## A B
## 10 12
tapply(pulse, groups, mean)
## A B
## 71.10000 75.83333
tapply2 <- function(x, group, f, ..., simplify = TRUE){
pieces <- split(x, group)
sapply(pieces, f, simplify=simplify)
}
tapply2(pulse, groups, length)
## A B
## 10 12
xs <- replicate(3, runif(4),simplify=FALSE) ## simplify = TRUE (default) will convert a list to matrix whenever possible
ws <- replicate(3, rnorm(4, 1) + 1,simplify=FALSE)
xs
## [[1]]
## [1] 0.98025667 0.61962004 0.09382717 0.94452920
##
## [[2]]
## [1] 0.1334957 0.8816876 0.8900496 0.4542970
##
## [[3]]
## [1] 0.01704906 0.74583446 0.51003283 0.81173743
ws
## [[1]]
## [1] 0.4617377 2.0251183 1.6455966 1.4598447
##
## [[2]]
## [1] 1.0569145 -0.4492433 1.6109724 2.1102390
##
## [[3]]
## [1] 0.1507431 2.5354787 1.5159181 0.6887094
unlist(lapply(seq_along(xs), function(i){
weighted.mean(xs[[i]], ws[[i]])
}))
## [1] 0.5794923 0.4937814 0.6595657
Map(weighted.mean,xs,ws)
## [[1]]
## [1] 0.5794923
##
## [[2]]
## [1] 0.4937814
##
## [[3]]
## [1] 0.6595657
unlist(Map(function(x,w) weighted.mean(x, w, na.rm=TRUE),xs, ws))
## [1] 0.5794923 0.4937814 0.6595657
mapply(weighted.mean,xs,ws)
## [1] 0.5794923 0.4937814 0.6595657
mapply(function(x,y) weighted.mean(x,y), xs,ws)
## [1] 0.5794923 0.4937814 0.6595657
set.seed(32611)
l <- replicate(4, sample(1:10, 15, replace = T), simplify = FALSE)
str(l)
## List of 4
## $ : int [1:15] 5 4 1 6 2 1 9 5 7 6 ...
## $ : int [1:15] 6 7 8 6 10 3 4 9 2 10 ...
## $ : int [1:15] 8 6 8 8 5 5 3 6 9 8 ...
## $ : int [1:15] 5 6 7 2 5 9 4 10 3 8 ...
intersect(intersect(intersect(l[[1]], l[[2]]),
l[[3]]), l[[4]])
## [1] 1 6 2 9 7 3
Reduce(intersect,l)
## [1] 1 6 2 9 7 3
a <- c(1:10)
Reduce('+', a)
## [1] 55
b = as.list(letters[1:10])
Reduce("paste0", b)
## [1] "abcdefghij"
d <- as.list(letters[1:10])
Reduce("c",d)
## [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j"
## Iterative function application:
Funcall <- function(f, ...) f(...)
## Compute log(exp(acos(cos(0))
Reduce(Funcall, list(log, exp, acos, cos), 0, right = TRUE)
## [1] 0
alist = list(a="a", b=1, c=FALSE, d=list(1:10), e="e", f = 2)
Filter(is.character, alist)
## $a
## [1] "a"
##
## $e
## [1] "e"
alist = list(a="a", b=1, c=FALSE, d=list(1:10), e="e", f = 2)
Find(is.character, alist)
## [1] "a"
Find(is.numeric, alist)
## [1] 1
alist = list(a="a", b=1, c=FALSE, d=list(1:10), e="e", f = 2)
Position(is.character, alist)
## [1] 1
Position(is.numeric, alist)
## [1] 2
outer(1:3, 1:5, "*")
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 2 3 4 5
## [2,] 2 4 6 8 10
## [3,] 3 6 9 12 15
1:3 %o% 1:5
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 2 3 4 5
## [2,] 2 4 6 8 10
## [3,] 3 6 9 12 15
outer(1:3, 1:5, "paste")
## [,1] [,2] [,3] [,4] [,5]
## [1,] "1 1" "1 2" "1 3" "1 4" "1 5"
## [2,] "2 1" "2 2" "2 3" "2 4" "2 5"
## [3,] "3 1" "3 2" "3 3" "3 4" "3 5"
outer(1:3, 1:5, ">")
## [,1] [,2] [,3] [,4] [,5]
## [1,] FALSE FALSE FALSE FALSE FALSE
## [2,] TRUE FALSE FALSE FALSE FALSE
## [3,] TRUE TRUE FALSE FALSE FALSE
outer(1:3, 1:5, "^")
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 1 1 1 1
## [2,] 2 4 8 16 32
## [3,] 3 9 27 81 243
combn(4,2) ## all combinations of "choose 2 numbers from 1:4"
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 1 1 1 2 2 3
## [2,] 2 3 4 3 4 4
## However, you cannot do combn(4:5,2:3), how to vectorize this function
combnV <- Vectorize(function(x, m) combn(x, m),
vectorize.args = c("x", "m"))
combnV(4:5,2:3)
## [[1]]
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 1 1 1 2 2 3
## [2,] 2 3 4 3 4 4
##
## [[2]]
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 1 1 1 1 1 1 2 2 2 3
## [2,] 2 2 2 3 3 4 3 3 4 4
## [3,] 3 4 5 4 5 5 4 5 5 5
end
suppressWarnings(library(knitr))
purl("vectorized.rmd", output = "vectorized.R ", documentation = 2)
##
##
## processing file: vectorized.rmd
##
|
| | 0%
|
|. | 1%
|
|. | 2%
|
|.. | 3%
|
|... | 4%
|
|... | 5%
|
|.... | 6%
|
|..... | 7%
|
|..... | 8%
|
|...... | 9%
|
|....... | 10%
|
|....... | 11%
|
|........ | 12%
|
|......... | 14%
|
|......... | 15%
|
|.......... | 16%
|
|........... | 17%
|
|............ | 18%
|
|............ | 19%
|
|............. | 20%
|
|.............. | 21%
|
|.............. | 22%
|
|............... | 23%
|
|................ | 24%
|
|................ | 25%
|
|................. | 26%
|
|.................. | 27%
|
|.................. | 28%
|
|................... | 29%
|
|.................... | 30%
|
|.................... | 31%
|
|..................... | 32%
|
|...................... | 33%
|
|...................... | 34%
|
|....................... | 35%
|
|........................ | 36%
|
|........................ | 38%
|
|......................... | 39%
|
|.......................... | 40%
|
|.......................... | 41%
|
|........................... | 42%
|
|............................ | 43%
|
|............................ | 44%
|
|............................. | 45%
|
|.............................. | 46%
|
|.............................. | 47%
|
|............................... | 48%
|
|................................ | 49%
|
|................................ | 50%
|
|................................. | 51%
|
|.................................. | 52%
|
|................................... | 53%
|
|................................... | 54%
|
|.................................... | 55%
|
|..................................... | 56%
|
|..................................... | 57%
|
|...................................... | 58%
|
|....................................... | 59%
|
|....................................... | 60%
|
|........................................ | 61%
|
|......................................... | 62%
|
|......................................... | 64%
|
|.......................................... | 65%
|
|........................................... | 66%
|
|........................................... | 67%
|
|............................................ | 68%
|
|............................................. | 69%
|
|............................................. | 70%
|
|.............................................. | 71%
|
|............................................... | 72%
|
|............................................... | 73%
|
|................................................ | 74%
|
|................................................. | 75%
|
|................................................. | 76%
|
|.................................................. | 77%
|
|................................................... | 78%
|
|................................................... | 79%
|
|.................................................... | 80%
|
|..................................................... | 81%
|
|..................................................... | 82%
|
|...................................................... | 83%
|
|....................................................... | 84%
|
|........................................................ | 85%
|
|........................................................ | 86%
|
|......................................................... | 88%
|
|.......................................................... | 89%
|
|.......................................................... | 90%
|
|........................................................... | 91%
|
|............................................................ | 92%
|
|............................................................ | 93%
|
|............................................................. | 94%
|
|.............................................................. | 95%
|
|.............................................................. | 96%
|
|............................................................... | 97%
|
|................................................................ | 98%
|
|................................................................ | 99%
|
|.................................................................| 100%
## output file: vectorized.R
## [1] "vectorized.R "