Biostatistical Computing, PHC 6068

Vectorized calculation

Zhiguang Huo (Caleb)

Monday September 6, 2017

Why do we need vectorized calculation (motivating example 1)

a <- 1:1000000
### version A, loop
start <- Sys.time()
meanA <- 0
for(i in seq_along(a)){
  meanA <- meanA + a[i]/length(a)
}
end <- Sys.time() 
end - start
## Time difference of 1.313023 secs
meanA
## [1] 500000.5
### version B, vectorized calculation
start <- Sys.time()
mean(a)
## [1] 500000.5
end <- Sys.time() 
end - start
## Time difference of 0.005419016 secs

Why do we need vectorized calculation (motivating example 2)

a <- 1:1000000
b <- 1000000:1
### version A, loop
start <- Sys.time()
result <- numeric(length(a)) ## create a vector with length length(a) and all elements 0
for(i in seq_along(a)){
  result[i] <- a[i] + b[i]
}
end <- Sys.time() 
end - start
## Time difference of 2.219551 secs
### version B, vectorized calculation
start <- Sys.time()
result <- a + b
end <- Sys.time() 
end - start
## Time difference of 0.01295304 secs

Simple examples of vectorized calculation

a <- c(1.1,3.4,9.5)
b <- c(9,2,0.8)
a + b ## vector addition
## [1] 10.1  5.4 10.3
a * b ## vector multiplication
## [1] 9.9 6.8 7.6
a ^ b ## 1.1^9, 3.4^2, 9.5^0.8
## [1]  2.357948 11.560000  6.055903

Simple examples of vectorized calculation 2

a <- 1:8
a + 2
## [1]  3  4  5  6  7  8  9 10
a + c(0,1)
## [1] 1 3 3 5 5 7 7 9
a + c(1,2,3) ## warming message 
## Warning in a + c(1, 2, 3): longer object length is not a multiple of
## shorter object length
## [1]  2  4  6  5  7  9  8 10

Other scientific calculation

a <- seq(1,3,1)
sin(a)
## [1] 0.8414710 0.9092974 0.1411200
tan(a)
## [1]  1.5574077 -2.1850399 -0.1425465
log(a,base = 10)
## [1] 0.0000000 0.3010300 0.4771213
log10(a)
## [1] 0.0000000 0.3010300 0.4771213
exp(a)
## [1]  2.718282  7.389056 20.085537

vectorized input for plot

plot(x = 1:10, y = 1:10, col=1:10)

plot(x = 1:10, y = 1:10, col=1)

Initialize a vector

n <- 5
a_int <- integer(n)
a_int
## [1] 0 0 0 0 0
b_int <- rep(0, n)
b_int
## [1] 0 0 0 0 0
c_int <- replicate(5,0)
c_int
## [1] 0 0 0 0 0

Initialize a vector, other types

a_double <- double(n)
a_double
## [1] 0 0 0 0 0
a_char <- character(n)
a_char
## [1] "" "" "" "" ""
a_logical <- logical(n)
a_logical
## [1] FALSE FALSE FALSE FALSE FALSE

A sequence

a <- seq(from=10, to=100, by=2)
a
##  [1]  10  12  14  16  18  20  22  24  26  28  30  32  34  36  38  40  42
## [18]  44  46  48  50  52  54  56  58  60  62  64  66  68  70  72  74  76
## [35]  78  80  82  84  86  88  90  92  94  96  98 100
b <- seq(from=10, to=100, length=10)
b
##  [1]  10  20  30  40  50  60  70  80  90 100
c <- seq(from=10, to=100, along.with=1:10)
c
##  [1]  10  20  30  40  50  60  70  80  90 100

Random number generator

set.seed(32611) ## set a seed number such that the random numbers will keep the same
rnorm(n = 5, mean = 0, sd = 1)
## [1] -0.1023726 -1.8869039 -1.0586563  1.0227599  0.3982516
set.seed(32611)
rnorm(n = 5, mean = 0:4, sd = 1)
## [1] -0.1023726 -0.8869039  0.9413437  4.0227599  4.3982516
set.seed(32611)
sample(1:10,1)
## [1] 5
set.seed(32611)
runif(n = 4)
## [1] 0.45923048 0.37486058 0.02958663 0.51775341

replicate

replicate(4,list())
## [[1]]
## list()
## 
## [[2]]
## list()
## 
## [[3]]
## list()
## 
## [[4]]
## list()
replicate(5, runif(sample(1:5,1)))
## [[1]]
## [1] 0.02609954
## 
## [[2]]
## [1] 0.4031372 0.6547776 0.5071504 0.2504906 0.9807351
## 
## [[3]]
## [1] 0.8903292 0.5409666 0.5041831
## 
## [[4]]
## [1] 0.7552580 0.5850660 0.9150680 0.2715686
## 
## [[5]]
## [1] 0.8858161 0.1869268

Matrix calculation

a <- matrix(1:6,nrow=3,ncol=2)
a ## matrix will be filled by column by default. 
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
b <- matrix(6:1, nrow=3, ncol=2)
a * b ## similar to vector, matrix algebra will be done element-wise.
##      [,1] [,2]
## [1,]    6   12
## [2,]   10   10
## [3,]   12    6
a + c(1,2,3) ## if a matrix add a vector, add by column
##      [,1] [,2]
## [1,]    2    5
## [2,]    4    7
## [3,]    6    9

apply function: works on margins of a matrix (1)

a <- matrix(1:6,nrow=3,ncol=2)
a
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
apply(a,1,sum) ## for each row, equivalent to rowSums
## [1] 5 7 9
apply(a,1,var) ## for each row, calculate the variance
## [1] 4.5 4.5 4.5
apply(a,1,function(x) x^2) ## can also use an anonymous function, note that R will always fill the result by column.
##      [,1] [,2] [,3]
## [1,]    1    4    9
## [2,]   16   25   36

apply function: works on margins of a matrix (2)

apply(a,2,sum) ## for each column, equivalent to colSums
## [1]  6 15
colSums(a)
## [1]  6 15
apply(a,2,mean) ## for each column, equivalent to colMeans
## [1] 2 5
apply(a,2,function(x) min(x)) ## equivalent to apply(a,2,min)
## [1] 1 4
apply(a,2,min)
## [1] 1 4

Sweep

set.seed(32611)
(x <- matrix(rnorm(6,0,4),nrow=2))
##            [,1]      [,2]      [,3]
## [1,] -0.4094903 -4.234625  1.593006
## [2,] -7.5476157  4.091040 -2.691786
x1 <- sweep(x,1,apply(x,1,min),'-')
x1
##          [,1]     [,2]     [,3]
## [1,] 3.825135  0.00000 5.827631
## [2,] 0.000000 11.63866 4.855829
x2 <- sweep(x1,1,apply(x1,1,max),'/')
x2
##          [,1] [,2]      [,3]
## [1,] 0.656379    0 1.0000000
## [2,] 0.000000    1 0.4172157

lapply()

l <- list(c(1:10), list(a='a'), c("dog","cat","gator"))
unlist(lapply(l, length))
## [1] 10  1  3
lapply2 <- function(x, f, ...){
  out <- vector("list", length(x))
  for(i in seq_along(x)){
    out[[i]] <- f(x[[i]], ...)
  }
  out
}
unlist(lapply2(l, length))
## [1] 10  1  3

lapply usages

alist <- list(a = 25, b = 100, c = 64)
lapply(alist, function(x) sqrt(x))
## $a
## [1] 5
## 
## $b
## [1] 10
## 
## $c
## [1] 8
lapply(seq_along(alist), function(x) sqrt(alist[[x]]))
## [[1]]
## [1] 5
## 
## [[2]]
## [1] 10
## 
## [[3]]
## [1] 8
lapply(names(alist), function(x) sqrt(alist[[x]]))
## [[1]]
## [1] 5
## 
## [[2]]
## [1] 10
## 
## [[3]]
## [1] 8

lapply on data.frame

aframe <- data.frame(col1=1:3,col2=4:6)
lapply(aframe, mean) ## as a list
## $col1
## [1] 2
## 
## $col2
## [1] 5
apply(aframe, 2, mean) ## as a matrix
## col1 col2 
##    2    5

lapply on a list of functions

compute_mean <- list(
  base = function(x) mean(x),
  sum = function(x){
    sum(x)/length(x)},
  mannual = function(x){
    total <- 0
    n <- length(x)
    for(i in seq_along(x)){
      total <- total + x[i]/n
    }
    total
  }
)

set.seed(32611)
x <- runif(1e5)
lapply(compute_mean,function(f) system.time(f(x)))
## $base
##    user  system elapsed 
##   0.001   0.000   0.001 
## 
## $sum
##    user  system elapsed 
##   0.000   0.000   0.001 
## 
## $mannual
##    user  system elapsed 
##   0.071   0.004   0.094

sapply() and vapply()

aframe <- data.frame(col1=1:3,col2=4:6)
sapply(aframe, sum) ## if not the same atomic type, will coerce to a list
## col1 col2 
##    6   15
aframe <- data.frame(col1=1:3,col2=4:6)
vapply(aframe, sum, numeric(1)) 
## col1 col2 
##    6   15

aggregate

head(state.x77)
##            Population Income Illiteracy Life Exp Murder HS Grad Frost
## Alabama          3615   3624        2.1    69.05   15.1    41.3    20
## Alaska            365   6315        1.5    69.31   11.3    66.7   152
## Arizona          2212   4530        1.8    70.55    7.8    58.1    15
## Arkansas         2110   3378        1.9    70.66   10.1    39.9    65
## California      21198   5114        1.1    71.71   10.3    62.6    20
## Colorado         2541   4884        0.7    72.06    6.8    63.9   166
##              Area
## Alabama     50708
## Alaska     566432
## Arizona    113417
## Arkansas    51945
## California 156361
## Colorado   103766
head(state.region)
## [1] South West  West  South West  West 
## Levels: Northeast South North Central West
aggregate(state.x77, list(Region = state.region), mean)
##          Region Population   Income Illiteracy Life Exp    Murder  HS Grad
## 1     Northeast   5495.111 4570.222   1.000000 71.26444  4.722222 53.96667
## 2         South   4208.125 4011.938   1.737500 69.70625 10.581250 44.34375
## 3 North Central   4803.000 4611.083   0.700000 71.76667  5.275000 54.51667
## 4          West   2915.308 4702.615   1.023077 71.23462  7.215385 62.00000
##      Frost      Area
## 1 132.7778  18141.00
## 2  64.6250  54605.12
## 3 138.8333  62652.00
## 4 102.1538 134463.00

tapply

pulse <- round(rnorm(22,70,10/3) + rep(c(0,5), c(10,12)))
groups <- rep(c("A", "B"), c(10, 12))
tapply(pulse, groups, length)
##  A  B 
## 10 12
tapply(pulse, groups, mean)
##        A        B 
## 71.10000 75.83333
tapply2 <- function(x, group, f, ..., simplify = TRUE){
  pieces <- split(x, group)
  sapply(pieces, f, simplify=simplify)
}
tapply2(pulse, groups, length)
##  A  B 
## 10 12

Multiple inputs (Map)

xs <- replicate(3, runif(4),simplify=FALSE) ## simplify = TRUE (default) will convert a list to matrix whenever possible
ws <- replicate(3, rnorm(4, 1) + 1,simplify=FALSE)
xs
## [[1]]
## [1] 0.98025667 0.61962004 0.09382717 0.94452920
## 
## [[2]]
## [1] 0.1334957 0.8816876 0.8900496 0.4542970
## 
## [[3]]
## [1] 0.01704906 0.74583446 0.51003283 0.81173743
ws
## [[1]]
## [1] 0.4617377 2.0251183 1.6455966 1.4598447
## 
## [[2]]
## [1]  1.0569145 -0.4492433  1.6109724  2.1102390
## 
## [[3]]
## [1] 0.1507431 2.5354787 1.5159181 0.6887094
unlist(lapply(seq_along(xs), function(i){
  weighted.mean(xs[[i]], ws[[i]])
}))
## [1] 0.5794923 0.4937814 0.6595657
Map(weighted.mean,xs,ws)
## [[1]]
## [1] 0.5794923
## 
## [[2]]
## [1] 0.4937814
## 
## [[3]]
## [1] 0.6595657
unlist(Map(function(x,w) weighted.mean(x, w, na.rm=TRUE),xs, ws))
## [1] 0.5794923 0.4937814 0.6595657

approach 3

mapply(weighted.mean,xs,ws)
## [1] 0.5794923 0.4937814 0.6595657
mapply(function(x,y) weighted.mean(x,y), xs,ws)
## [1] 0.5794923 0.4937814 0.6595657

Common Higher-Order Functions in Functional Programming Languages

Reduce

set.seed(32611)
l <- replicate(4, sample(1:10, 15, replace = T), simplify = FALSE)
str(l)
## List of 4
##  $ : int [1:15] 5 4 1 6 2 1 9 5 7 6 ...
##  $ : int [1:15] 6 7 8 6 10 3 4 9 2 10 ...
##  $ : int [1:15] 8 6 8 8 5 5 3 6 9 8 ...
##  $ : int [1:15] 5 6 7 2 5 9 4 10 3 8 ...
intersect(intersect(intersect(l[[1]], l[[2]]),
  l[[3]]), l[[4]])
## [1] 1 6 2 9 7 3
Reduce(intersect,l)
## [1] 1 6 2 9 7 3

Reduce 2

a <- c(1:10)
Reduce('+', a)
## [1] 55
b = as.list(letters[1:10])
Reduce("paste0", b)
## [1] "abcdefghij"
d <- as.list(letters[1:10])
Reduce("c",d)
##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j"

Reduce 3

## Iterative function application:
Funcall <- function(f, ...) f(...)
## Compute log(exp(acos(cos(0))
Reduce(Funcall, list(log, exp, acos, cos), 0, right = TRUE)
## [1] 0

Filter

alist = list(a="a", b=1, c=FALSE, d=list(1:10), e="e", f = 2)
Filter(is.character, alist)
## $a
## [1] "a"
## 
## $e
## [1] "e"

Find

alist = list(a="a", b=1, c=FALSE, d=list(1:10), e="e", f = 2)
Find(is.character, alist)
## [1] "a"
Find(is.numeric, alist)
## [1] 1

Position

alist = list(a="a", b=1, c=FALSE, d=list(1:10), e="e", f = 2)
Position(is.character, alist)
## [1] 1
Position(is.numeric, alist)
## [1] 2

Outer

outer(1:3, 1:5, "*")
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    2    3    4    5
## [2,]    2    4    6    8   10
## [3,]    3    6    9   12   15
1:3 %o% 1:5
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    2    3    4    5
## [2,]    2    4    6    8   10
## [3,]    3    6    9   12   15

Outer 2

outer(1:3, 1:5, "paste")
##      [,1]  [,2]  [,3]  [,4]  [,5] 
## [1,] "1 1" "1 2" "1 3" "1 4" "1 5"
## [2,] "2 1" "2 2" "2 3" "2 4" "2 5"
## [3,] "3 1" "3 2" "3 3" "3 4" "3 5"
outer(1:3, 1:5, ">")
##       [,1]  [,2]  [,3]  [,4]  [,5]
## [1,] FALSE FALSE FALSE FALSE FALSE
## [2,]  TRUE FALSE FALSE FALSE FALSE
## [3,]  TRUE  TRUE FALSE FALSE FALSE
outer(1:3, 1:5, "^")
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    1    1    1    1
## [2,]    2    4    8   16   32
## [3,]    3    9   27   81  243

Vectorize a function

combn(4,2) ## all combinations of "choose 2 numbers from 1:4"
##      [,1] [,2] [,3] [,4] [,5] [,6]
## [1,]    1    1    1    2    2    3
## [2,]    2    3    4    3    4    4
## However, you cannot do combn(4:5,2:3), how to vectorize this function

combnV <- Vectorize(function(x, m) combn(x, m),
                    vectorize.args = c("x", "m"))

combnV(4:5,2:3)
## [[1]]
##      [,1] [,2] [,3] [,4] [,5] [,6]
## [1,]    1    1    1    2    2    3
## [2,]    2    3    4    3    4    4
## 
## [[2]]
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,]    1    1    1    1    1    1    2    2    2     3
## [2,]    2    2    2    3    3    4    3    3    4     4
## [3,]    3    4    5    4    5    5    4    5    5     5

Balance between efficiency and simplicity of your code

end

suppressWarnings(library(knitr))
purl("vectorized.rmd", output = "vectorized.R ", documentation = 2)
## 
## 
## processing file: vectorized.rmd
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |.                                                                |   1%
  |                                                                       
  |.                                                                |   2%
  |                                                                       
  |..                                                               |   3%
  |                                                                       
  |...                                                              |   4%
  |                                                                       
  |...                                                              |   5%
  |                                                                       
  |....                                                             |   6%
  |                                                                       
  |.....                                                            |   7%
  |                                                                       
  |.....                                                            |   8%
  |                                                                       
  |......                                                           |   9%
  |                                                                       
  |.......                                                          |  10%
  |                                                                       
  |.......                                                          |  11%
  |                                                                       
  |........                                                         |  12%
  |                                                                       
  |.........                                                        |  14%
  |                                                                       
  |.........                                                        |  15%
  |                                                                       
  |..........                                                       |  16%
  |                                                                       
  |...........                                                      |  17%
  |                                                                       
  |............                                                     |  18%
  |                                                                       
  |............                                                     |  19%
  |                                                                       
  |.............                                                    |  20%
  |                                                                       
  |..............                                                   |  21%
  |                                                                       
  |..............                                                   |  22%
  |                                                                       
  |...............                                                  |  23%
  |                                                                       
  |................                                                 |  24%
  |                                                                       
  |................                                                 |  25%
  |                                                                       
  |.................                                                |  26%
  |                                                                       
  |..................                                               |  27%
  |                                                                       
  |..................                                               |  28%
  |                                                                       
  |...................                                              |  29%
  |                                                                       
  |....................                                             |  30%
  |                                                                       
  |....................                                             |  31%
  |                                                                       
  |.....................                                            |  32%
  |                                                                       
  |......................                                           |  33%
  |                                                                       
  |......................                                           |  34%
  |                                                                       
  |.......................                                          |  35%
  |                                                                       
  |........................                                         |  36%
  |                                                                       
  |........................                                         |  38%
  |                                                                       
  |.........................                                        |  39%
  |                                                                       
  |..........................                                       |  40%
  |                                                                       
  |..........................                                       |  41%
  |                                                                       
  |...........................                                      |  42%
  |                                                                       
  |............................                                     |  43%
  |                                                                       
  |............................                                     |  44%
  |                                                                       
  |.............................                                    |  45%
  |                                                                       
  |..............................                                   |  46%
  |                                                                       
  |..............................                                   |  47%
  |                                                                       
  |...............................                                  |  48%
  |                                                                       
  |................................                                 |  49%
  |                                                                       
  |................................                                 |  50%
  |                                                                       
  |.................................                                |  51%
  |                                                                       
  |..................................                               |  52%
  |                                                                       
  |...................................                              |  53%
  |                                                                       
  |...................................                              |  54%
  |                                                                       
  |....................................                             |  55%
  |                                                                       
  |.....................................                            |  56%
  |                                                                       
  |.....................................                            |  57%
  |                                                                       
  |......................................                           |  58%
  |                                                                       
  |.......................................                          |  59%
  |                                                                       
  |.......................................                          |  60%
  |                                                                       
  |........................................                         |  61%
  |                                                                       
  |.........................................                        |  62%
  |                                                                       
  |.........................................                        |  64%
  |                                                                       
  |..........................................                       |  65%
  |                                                                       
  |...........................................                      |  66%
  |                                                                       
  |...........................................                      |  67%
  |                                                                       
  |............................................                     |  68%
  |                                                                       
  |.............................................                    |  69%
  |                                                                       
  |.............................................                    |  70%
  |                                                                       
  |..............................................                   |  71%
  |                                                                       
  |...............................................                  |  72%
  |                                                                       
  |...............................................                  |  73%
  |                                                                       
  |................................................                 |  74%
  |                                                                       
  |.................................................                |  75%
  |                                                                       
  |.................................................                |  76%
  |                                                                       
  |..................................................               |  77%
  |                                                                       
  |...................................................              |  78%
  |                                                                       
  |...................................................              |  79%
  |                                                                       
  |....................................................             |  80%
  |                                                                       
  |.....................................................            |  81%
  |                                                                       
  |.....................................................            |  82%
  |                                                                       
  |......................................................           |  83%
  |                                                                       
  |.......................................................          |  84%
  |                                                                       
  |........................................................         |  85%
  |                                                                       
  |........................................................         |  86%
  |                                                                       
  |.........................................................        |  88%
  |                                                                       
  |..........................................................       |  89%
  |                                                                       
  |..........................................................       |  90%
  |                                                                       
  |...........................................................      |  91%
  |                                                                       
  |............................................................     |  92%
  |                                                                       
  |............................................................     |  93%
  |                                                                       
  |.............................................................    |  94%
  |                                                                       
  |..............................................................   |  95%
  |                                                                       
  |..............................................................   |  96%
  |                                                                       
  |...............................................................  |  97%
  |                                                                       
  |................................................................ |  98%
  |                                                                       
  |................................................................ |  99%
  |                                                                       
  |.................................................................| 100%
## output file: vectorized.R
## [1] "vectorized.R "