Biostatistical Computing, PHC 6068

R function

Zhiguang Huo (Caleb)

Monday August 28, 2017

Why do we need functions

Component of a R function

cubic <- function(x){
  return(x^3)
} 
## x: function argument
## cubic: function name

body(cubic)
## {
##     return(x^3)
## }
formals(cubic)
## $x
environment(cubic)
## <environment: R_GlobalEnv>

Primitive functions

sum
## function (..., na.rm = FALSE)  .Primitive("sum")
body(sum)
## NULL
formals(sum)
## NULL
environment(sum)
## NULL

Example - Odds Ratio Function

Example - Odds Ratio Function

X <- matrix(c(189, 104, 10845, 10933), nrow=2,
            dimnames=list(Treatment=c("Placebo","Aspirin"), 
                          "Myocardial Infarction"=c("Yes", "No")))
X
##          Myocardial Infarction
## Treatment Yes    No
##   Placebo 189 10845
##   Aspirin 104 10933

Returning Objects

odds.ratio0 <- function(X){
  result <- X[1,1]*X[2,2]/(X[1,2]*X[2,1])
  return(result)
}
odds.ratio0(X)
## [1] 1.832054

Returning Objects

odds.ratio1 <- function(X){
  X[1,1]*X[2,2]/(X[1,2]*X[2,1])
}
odds.ratio1(X)
## [1] 1.832054
odds.ratio2 <- function(X) ## if the function has only one line, don't even need brackets
  X[1,1]*X[2,2]/(X[1,2]*X[2,1])

odds.ratio2(X)
## [1] 1.832054

Returning Objects 2

odds.ratio3 <- function(X){
  result <- X[1,1]*X[2,2]/(X[1,2]*X[2,1])
  invisible(result)
}
odds.ratio3(X)
## here nothing is returned
OR <- odds.ratio3(X)
OR
## [1] 1.832054

Return multiple variables

odds.ratio <- function(X, conf.level=0.95){
  OR <- X[1,1]*X[2,2]/(X[1,2]*X[2,1])
  logOR.SE <- sqrt(sum(1/X))
  
  alpha <- 1 - conf.level
  CI.lower <- exp(log(OR) - qnorm(1 - alpha/2)*logOR.SE)
  CI.upper <- exp(log(OR) + qnorm(1 - alpha/2)*logOR.SE)
  # qnorm returns the quantiles of a Gaussian distribution.
  
  out <- list(OR=OR, CI=c(CI.lower, CI.upper), conf.level=conf.level)
  return(out)
}
odds.ratio(X)
## $OR
## [1] 1.832054
## 
## $CI
## [1] 1.440042 2.330780
## 
## $conf.level
## [1] 0.95
odds.ratio(X)$OR
## [1] 1.832054

tracking progress of a function

odds.ratio <- function(X, conf.level=0.95){
  cat("calculating odds ratio...","\n")
  OR <- X[1,1]*X[2,2]/(X[1,2]*X[2,1])
  logOR.SE <- sqrt(sum(1/X))
  
  cat("calculating confidence interval...","\n")
  alpha <- 1 - conf.level
  CI.lower <- exp(log(OR) - qnorm(1 - alpha/2)*logOR.SE)
  CI.upper <- exp(log(OR) + qnorm(1 - alpha/2)*logOR.SE)
  
  cat("done, returning output...","\n")
  out <- list(OR=OR, CI=c(CI.lower, CI.upper), conf.level=conf.level)
  return(out)
}
out <- odds.ratio(X)
## calculating odds ratio... 
## calculating confidence interval... 
## done, returning output...

Verifying Arguments

examples for verifying arguments

odds.ratio <- function(X, conf.level=0.95){
  stopifnot(!missing(X), is.matrix(X),dim(X)==c(2,2),X>0)

  cat("calculating odds ratio...","\n")
  OR <- X[1,1]*X[2,2]/(X[1,2]*X[2,1])
  logOR.SE <- sqrt(sum(1/X))
  
  cat("calculating confidence interval...","\n")
  alpha <- 1 - conf.level
  CI.lower <- exp(log(OR) - qnorm(1 - alpha/2)*logOR.SE)
  CI.upper <- exp(log(OR) + qnorm(1 - alpha/2)*logOR.SE)
  
  cat("done, returning output...","\n")
  out <- list(OR=OR, CI=c(CI.lower, CI.upper), conf.level=conf.level)
  return(out)
}
out <- odds.ratio(X)
## calculating odds ratio... 
## calculating confidence interval... 
## done, returning output...

Try catch

tryCatch function allows your code to be excuted even error exists

# result = tryCatch({
#     expr
# }, warning = function(w) {
#     warning-handler-code
# }, error = function(e) {
#     error-handler-code
# }
# )
result = tryCatch({
  2^6
}, warning = function(w) {
  print(paste("MY_WARNING:  ",w))
  return("warning")
}, error = function(e) {
  print(paste("MY_ERROR:  ",e))
  return("error")
}
)
result
## [1] 64
result = tryCatch({
  1:3 + 1:2
}, warning = function(w) {
  print(paste("MY_WARNING:  ",w))
  return("warning")
}, error = function(e) {
  print(paste("MY_ERROR:  ",e))
  return("error")
}
)
## [1] "MY_WARNING:   simpleWarning in 1:3 + 1:2: longer object length is not a multiple of shorter object length\n"
result
## [1] "warning"
result = tryCatch({
  "A" + "B"
}, warning = function(w) {
  print(paste("MY_WARNING:  ",w))
  return("warning")
}, error = function(e) {
  print(paste("MY_ERROR:  ",e))
  return("error")
}
)
## [1] "MY_ERROR:   Error in \"A\" + \"B\": non-numeric argument to binary operator\n"
result
## [1] "error"

Lexical scoping for R functions

Scoping is the set of rules that govern how R looks up the value of a symbol.

Name masking

x <- 1
h <- function(){
  y <- 2
  i <- function(){
    z <- 3
    c(x, y, z)
  }
  i()
}

h()
## [1] 1 2 3

A fresh start

j <- function(){
  if(!exists("a")){
    a <- 1
  } else {
    a <- a + 1
  }
  print(a)
}
j()
## [1] 1
j()
## [1] 1

Dynamic lookup

x <- 15
f <- function() x
f()
## [1] 15
x <- 20
f()
## [1] 20

Every operation is a function call

x <- 10; y <- 5; 
x + y
## [1] 15
'+'(x, y)
## [1] 15
for(i in 1:2) cat(i,' ')
## 1  2
'for'(i, 1:2, cat(i,' '))
## 1  2
x <- 1:10; x[3]
## [1] 3
'['(x,3)
## [1] 3

Other infix functions

## %%: Remainder operator
5 %% 3
## [1] 2
## %/%: Integer division
5 %/% 3
## [1] 1
## %*% Matrix multiplication
## to be discussed next week.

## %o% Outer product
1:2 %o% 1:3
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    2    4    6
## %in% Match operator
c(1,2,3,4,5,6) %in% c(2,6,8)
## [1] FALSE  TRUE FALSE FALSE FALSE  TRUE

User defined infix functions

`%divisible%` <- function(x,y)
{
   if (x%%y ==0) return (TRUE)
   else          return (FALSE)
}
24 %divisible% 5
## [1] FALSE
24 %divisible% 4
## [1] TRUE
`%mypaste%` <- function(x,y)
{
   paste0(x, y)
}
"a" %mypaste% "b"
## [1] "ab"

Function arguments – default value

f <- function(arg, brg=10) 
  list(arg = arg, brg = brg)
str(f(arg=2, brg=10))
## List of 2
##  $ arg: num 2
##  $ brg: num 10
str(f(arg=2)) 
## List of 2
##  $ arg: num 2
##  $ brg: num 10

Function arguments

f <- function(arg, brg=10) 
  list(arg = arg, brg = brg)
str(f(arg=2, brg=10))
## List of 2
##  $ arg: num 2
##  $ brg: num 10
2. then by prefix matching, 
f <- function(arg, brg=10) 
  list(arg = arg, brg = brg)
str(f(b=10, ar=2))
## List of 2
##  $ arg: num 2
##  $ brg: num 10
3. finally by position matching
f <- function(arg, brg=10) 
  list(a = arg, b = brg)
str(f(2, 10))
## List of 2
##  $ a: num 2
##  $ b: num 10

function calling habits

mean(1:10); mean(1:10, trim = 0.05)
## [1] 5.5
## [1] 5.5
mean(x=1:10)
## [1] 5.5
mean(1:10, n = T); mean(1:10, , FALSE); mean(1:10, 0.05); mean(, TRUE, x = c(1:10, NA))
## [1] 5.5
## [1] 5.5
## [1] 5.5
## [1] 5.5

Extra argument: …

myPlot <- function(x,y,...){
  plot(x,y,...)
}
myPlot(1:4,1:4,col=1:4)

calling a function given a list of arguments

do.call(mean, list(1:10, na.rm=TRUE))
## [1] 5.5
mean(1:10, na.rm=TRUE)
## [1] 5.5

R path function

getwd()
## [1] "/Users/zhuo/Dropbox (UFL)/teaching/2017FALL/lectures/week2_Functions/functions"
WD0 <- getwd()
setwd("~") ## ~ represent user home directly 
getwd()
## [1] "/Users/zhuo"
setwd(WD0)
getwd()
## [1] "/Users/zhuo/Dropbox (UFL)/teaching/2017FALL/lectures/week2_Functions/functions"

Directory information

dir()
##  [1] "amatrix.csv"     "amatrix.rdata"   "amatrix.txt"    
##  [4] "amatrix2.txt"    "amatrix3.txt"    "amatrix4.txt"   
##  [7] "functions_files" "functions.html"  "functions.R"    
## [10] "functions.rmd"   "writeLines.txt"
dir("~")
##  [1] "5hmc_examples.pdf"       "Applications"           
##  [3] "Desktop"                 "Documents"              
##  [5] "Downloads"               "Dropbox"                
##  [7] "Dropbox (Personal)"      "Dropbox (UFL)"          
##  [9] "Library"                 "Movies"                 
## [11] "Music"                   "pairwiseCorrelation.pdf"
## [13] "Pictures"                "Public"
file.exists("result.Rdata")
## [1] FALSE
file.exists("amatrix.csv")
## [1] TRUE
dir.create("tmp")
dir.create("tmp") # there is a warning message if the folder already exist.
## Warning in dir.create("tmp"): 'tmp' already exists

File operation – Write

amatrix <- matrix(1:6,3,2)
rownames(amatrix) <- paste0("row", 1:nrow(amatrix))
colnames(amatrix) <- paste0("col", 1:ncol(amatrix))
write.csv(amatrix, file = "amatrix.csv")
write.table(amatrix, file = "amatrix.txt")
write.table(amatrix, file = "amatrix2.txt", row.names = FALSE) ## suppress row names
write.table(amatrix, file = "amatrix3.txt", quote = FALSE) ## suppress quote for strings
write.table(amatrix, file = "amatrix4.txt", row.names = FALSE, quote = FALSE) ## suppress both
amatrix <- matrix(1:6,3,2)
save(amatrix, file='amatrix.rdata')
aline <- "I like Biostatistical computing"
zz <- file("writeLines.txt", "w") ## w for wright
writeLines(aline, con=zz)
close(zz)

read

matrix_csv <- read.csv("amatrix.csv")
matrix_csv
##      X col1 col2
## 1 row1    1    4
## 2 row2    2    5
## 3 row3    3    6
matrix_csv2 <- read.csv("amatrix.csv", row.names = 1)
matrix_csv2
##      col1 col2
## row1    1    4
## row2    2    5
## row3    3    6
matrix_table <- read.table("amatrix4.txt")
matrix_table
##     V1   V2
## 1 col1 col2
## 2    1    4
## 3    2    5
## 4    3    6
matrix_table2 <- read.table("amatrix4.txt", header=TRUE)
matrix_table2
##   col1 col2
## 1    1    4
## 2    2    5
## 3    3    6
load("amatrix.rdata")
amatrix
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
matrix_load <- get(load("amatrix.rdata"))
matrix_load
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
dir()
##  [1] "amatrix.csv"     "amatrix.rdata"   "amatrix.txt"    
##  [4] "amatrix2.txt"    "amatrix3.txt"    "amatrix4.txt"   
##  [7] "functions_files" "functions.html"  "functions.R"    
## [10] "functions.rmd"   "tmp"             "writeLines.txt"
zz <- file("writeLines.txt", "r") ## r for read
readLines(con=zz)
## [1] "I like Biostatistical computing"
close(zz)

reading a text file in R line by line

inputFile <- "amatrix4.txt"
con  <- file(inputFile, open = "r")

while (length(oneLine <- readLines(con, n = 1, warn = FALSE)) > 0) {
    print(oneLine)
  } 
## [1] "col1 col2"
## [1] "1 4"
## [1] "2 5"
## [1] "3 6"
close(con)

Time function - Keep track the time of your progress

start <- Sys.time()
sum(1:1e4)
## [1] 50005000
end <- Sys.time()
end - start
## Time difference of 0.004792929 secs
start <- Sys.time()
result <- 0
for(i in 1:1e4){
  result <- result + i
}
result
## [1] 50005000
end <- Sys.time()
end - start
## Time difference of 0.01290894 secs

Lazy Evaluation

f <- function(x, y=x){
  x <- x + 1
  return(y)
}
f(1)
## [1] 2
f <- function(x, y=x){
  force(y) # force to evaluate y.
  x <- x + 1
  return(y)
}
f(1)
## [1] 1

Anonymous functions

function(x=4) g(x) + h(x)
## function(x=4) g(x) + h(x)
formals(function(x=4) g(x) + h(x))
## $x
## [1] 4
body(function(x=4) g(x) + h(x))
## g(x) + h(x)
environment(function(x=4) g(x) + h(x))
## <environment: R_GlobalEnv>

Call functions from another library

library(survival)

## seek for help about the survival package
help(package=survival)

## get help for a specific function
?coxph

## head(coxph)
## args(coxph)
## coxph


detach("package:survival", unload=TRUE) ## remove survival package from R loaded environment

Environments

Basic Environments

e <- new.env()
e$a <- FALSE
e$b <- "a"
e$c <- 2:3
e$d <- 1:3

Basic environments

Generally, an environment is similar to a list, with following important exceptions.

There are four special environments:

Environment search path

search() ## lists all parents of the global environment.
## [1] ".GlobalEnv"        "package:stats"     "package:graphics" 
## [4] "package:grDevices" "package:utils"     "package:datasets" 
## [7] "package:methods"   "Autoloads"         "package:base"
parent.env(globalenv())
## <environment: package:stats>
## attr(,"name")
## [1] "package:stats"
## attr(,"path")
## [1] "/Library/Frameworks/R.framework/Versions/3.4/Resources/library/stats"

Load a new package

library(mclust)
## Package 'mclust' version 5.3
## Type 'citation("mclust")' for citing this R package in publications.
search() ## lists all parents of the global environment.
##  [1] ".GlobalEnv"        "package:mclust"    "package:stats"    
##  [4] "package:graphics"  "package:grDevices" "package:utils"    
##  [7] "package:datasets"  "package:methods"   "Autoloads"        
## [10] "package:base"

Load a new package 2

environment(combn)
## <environment: namespace:utils>
library(combinat)
## 
## Attaching package: 'combinat'
## The following object is masked from 'package:utils':
## 
##     combn
search() ## lists all parents of the global environment.
##  [1] ".GlobalEnv"        "package:combinat"  "package:mclust"   
##  [4] "package:stats"     "package:graphics"  "package:grDevices"
##  [7] "package:utils"     "package:datasets"  "package:methods"  
## [10] "Autoloads"         "package:base"
environment(combn)
## <environment: namespace:combinat>
environment(utils::combn)
## <environment: namespace:utils>
environment(combinat::combn)
## <environment: namespace:combinat>

An experiment

x <- matrix(1:4,nrow=2,ncol=2)
nrow
## function (x) 
## dim(x)[1L]
## <bytecode: 0x7fc402e9a9c8>
## <environment: namespace:base>
nrow(x)
## [1] 2
dim(x)
## [1] 2 2
dim <- function() c(1,1)
## guess what is nrow(x)
nrow(x) 
## [1] 2
environment(nrow)
## <environment: namespace:base>
environment(dim)
## <environment: R_GlobalEnv>
environment(base::nrow)
## <environment: namespace:base>

Functional programming

R is a functional programming (FP) language. You can do anything with functions that you can do with vectors:

Closures

power <- function(exponent){
  function(x)
    x^exponent
} ## parent level
## child level
square <- power(2)
square(2); 
## [1] 4
cube <- power(3)
cube(2); 
## [1] 8

Top-Down function design

Code sketch

Below is pseudocode for top-down function design

original.problem <- function(many.args){
  sub1.problem.result <- step.sub1(some.args)
  sub2.problem.result <- step.sub2(some.args, sub1.problem.result)
  final.result <- step.sub3(some.args, sub2.problem.result)
  return(final.result)
}

step.sub1 <- function(some.args){
  ## blabla
}

step.sub2 <- function(some.args, sub1.problem.result){
  ## blabla
}

step.sub3 <- function(some.args, sub2.problem.result){
  ## blabla
}

References

suppressWarnings(library(knitr))
purl("functions.rmd", output = "functions.R", documentation = 2)
## 
## 
## processing file: functions.rmd
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |.                                                                |   1%
  |                                                                       
  |.                                                                |   2%
  |                                                                       
  |..                                                               |   2%
  |                                                                       
  |..                                                               |   3%
  |                                                                       
  |...                                                              |   4%
  |                                                                       
  |...                                                              |   5%
  |                                                                       
  |....                                                             |   6%
  |                                                                       
  |.....                                                            |   7%
  |                                                                       
  |.....                                                            |   8%
  |                                                                       
  |......                                                           |   9%
  |                                                                       
  |......                                                           |  10%
  |                                                                       
  |.......                                                          |  10%
  |                                                                       
  |.......                                                          |  11%
  |                                                                       
  |........                                                         |  12%
  |                                                                       
  |........                                                         |  13%
  |                                                                       
  |.........                                                        |  14%
  |                                                                       
  |..........                                                       |  15%
  |                                                                       
  |..........                                                       |  16%
  |                                                                       
  |...........                                                      |  17%
  |                                                                       
  |...........                                                      |  18%
  |                                                                       
  |............                                                     |  18%
  |                                                                       
  |............                                                     |  19%
  |                                                                       
  |.............                                                    |  20%
  |                                                                       
  |..............                                                   |  21%
  |                                                                       
  |..............                                                   |  22%
  |                                                                       
  |...............                                                  |  22%
  |                                                                       
  |...............                                                  |  23%
  |                                                                       
  |................                                                 |  24%
  |                                                                       
  |................                                                 |  25%
  |                                                                       
  |.................                                                |  26%
  |                                                                       
  |..................                                               |  27%
  |                                                                       
  |..................                                               |  28%
  |                                                                       
  |...................                                              |  29%
  |                                                                       
  |...................                                              |  30%
  |                                                                       
  |....................                                             |  30%
  |                                                                       
  |....................                                             |  31%
  |                                                                       
  |.....................                                            |  32%
  |                                                                       
  |.....................                                            |  33%
  |                                                                       
  |......................                                           |  34%
  |                                                                       
  |.......................                                          |  35%
  |                                                                       
  |.......................                                          |  36%
  |                                                                       
  |........................                                         |  37%
  |                                                                       
  |........................                                         |  38%
  |                                                                       
  |.........................                                        |  38%
  |                                                                       
  |.........................                                        |  39%
  |                                                                       
  |..........................                                       |  40%
  |                                                                       
  |...........................                                      |  41%
  |                                                                       
  |...........................                                      |  42%
  |                                                                       
  |............................                                     |  42%
  |                                                                       
  |............................                                     |  43%
  |                                                                       
  |.............................                                    |  44%
  |                                                                       
  |.............................                                    |  45%
  |                                                                       
  |..............................                                   |  46%
  |                                                                       
  |...............................                                  |  47%
  |                                                                       
  |...............................                                  |  48%
  |                                                                       
  |................................                                 |  49%
  |                                                                       
  |................................                                 |  50%
  |                                                                       
  |.................................                                |  50%
  |                                                                       
  |.................................                                |  51%
  |                                                                       
  |..................................                               |  52%
  |                                                                       
  |..................................                               |  53%
  |                                                                       
  |...................................                              |  54%
  |                                                                       
  |....................................                             |  55%
  |                                                                       
  |....................................                             |  56%
  |                                                                       
  |.....................................                            |  57%
  |                                                                       
  |.....................................                            |  58%
  |                                                                       
  |......................................                           |  58%
  |                                                                       
  |......................................                           |  59%
  |                                                                       
  |.......................................                          |  60%
  |                                                                       
  |........................................                         |  61%
  |                                                                       
  |........................................                         |  62%
  |                                                                       
  |.........................................                        |  62%
  |                                                                       
  |.........................................                        |  63%
  |                                                                       
  |..........................................                       |  64%
  |                                                                       
  |..........................................                       |  65%
  |                                                                       
  |...........................................                      |  66%
  |                                                                       
  |............................................                     |  67%
  |                                                                       
  |............................................                     |  68%
  |                                                                       
  |.............................................                    |  69%
  |                                                                       
  |.............................................                    |  70%
  |                                                                       
  |..............................................                   |  70%
  |                                                                       
  |..............................................                   |  71%
  |                                                                       
  |...............................................                  |  72%
  |                                                                       
  |...............................................                  |  73%
  |                                                                       
  |................................................                 |  74%
  |                                                                       
  |.................................................                |  75%
  |                                                                       
  |.................................................                |  76%
  |                                                                       
  |..................................................               |  77%
  |                                                                       
  |..................................................               |  78%
  |                                                                       
  |...................................................              |  78%
  |                                                                       
  |...................................................              |  79%
  |                                                                       
  |....................................................             |  80%
  |                                                                       
  |.....................................................            |  81%
  |                                                                       
  |.....................................................            |  82%
  |                                                                       
  |......................................................           |  82%
  |                                                                       
  |......................................................           |  83%
  |                                                                       
  |.......................................................          |  84%
  |                                                                       
  |.......................................................          |  85%
  |                                                                       
  |........................................................         |  86%
  |                                                                       
  |.........................................................        |  87%
  |                                                                       
  |.........................................................        |  88%
  |                                                                       
  |..........................................................       |  89%
  |                                                                       
  |..........................................................       |  90%
  |                                                                       
  |...........................................................      |  90%
  |                                                                       
  |...........................................................      |  91%
  |                                                                       
  |............................................................     |  92%
  |                                                                       
  |............................................................     |  93%
  |                                                                       
  |.............................................................    |  94%
  |                                                                       
  |..............................................................   |  95%
  |                                                                       
  |..............................................................   |  96%
  |                                                                       
  |...............................................................  |  97%
  |                                                                       
  |...............................................................  |  98%
  |                                                                       
  |................................................................ |  98%
  |                                                                       
  |................................................................ |  99%
  |                                                                       
  |.................................................................| 100%
## output file: functions.R
## [1] "functions.R"

In class exercises

  1. Constructive confidence interval for one sided t test, verifying arguments.

  2. Read in a file, print the some element of each line.

  3. Read functional programming example http://adv-r.had.co.nz.

Confidence interval for one sample t.test

Suppose you have a vector of numbers \(X\) from Gaussian distribution. Please write a function to construct \(\alpha\) level confidence interval for the mean estimate of \(X\), mimicing the ratio.ratio function. one sample t.test reference

n <- 20
x <- rnorm(n, 0, 1) ## generate n=20 samples from N(0,1)

## yourFunction(x, alpha = 0.05)

Read in data

newVec <- NULL

Functional programming

Why functional programming is useful