Biostatistical Computing, PHC 6068

R function

Zhiguang Huo (Caleb)

Wed Sep 2nd, 2020

Why do we need functions

Component of a R function

cubic <- function(x){
  return(x^3)
} 
## x: function argument
## cubic: function name

body(cubic)
## {
##     return(x^3)
## }
formals(cubic)
## $x
environment(cubic)
## <environment: R_GlobalEnv>

Primitive functions

sum
## function (..., na.rm = FALSE)  .Primitive("sum")
body(sum)
## NULL
formals(sum)
## NULL
environment(sum)
## NULL

Example - Odds Ratio Function

Example - Odds Ratio Function

X <- matrix(c(189, 104, 10845, 10933), nrow=2,
            dimnames=list(Treatment=c("Placebo","Aspirin"), 
                          "Myocardial Infarction"=c("Yes", "No")))
X
##          Myocardial Infarction
## Treatment Yes    No
##   Placebo 189 10845
##   Aspirin 104 10933

Returning Objects

odds.ratio0 <- function(X){
  result <- X[1,1]*X[2,2]/(X[1,2]*X[2,1])
  return(result)
}
odds.ratio0(X)
## [1] 1.832054

Returning Objects

odds.ratio1 <- function(X){
  X[1,1]*X[2,2]/(X[1,2]*X[2,1])
}
odds.ratio1(X)
## [1] 1.832054
odds.ratio2 <- function(X) ## if the function has only one line, don't even need brackets
  X[1,1]*X[2,2]/(X[1,2]*X[2,1])

odds.ratio2(X)
## [1] 1.832054

Return multiple variables

odds.ratio <- function(X, conf.level=0.95){
  OR <- X[1,1]*X[2,2]/(X[1,2]*X[2,1])
  logOR.SE <- sqrt(sum(1/X))
  
  alpha <- 1 - conf.level
  CI.lower <- exp(log(OR) - qnorm(1 - alpha/2)*logOR.SE)
  CI.upper <- exp(log(OR) + qnorm(1 - alpha/2)*logOR.SE)
  # qnorm returns the quantiles of a Gaussian distribution.
  
  out <- list(OR=OR, CI=c(CI.lower, CI.upper), conf.level=conf.level)
  return(out)
}
odds.ratio(X)
## $OR
## [1] 1.832054
## 
## $CI
## [1] 1.440042 2.330780
## 
## $conf.level
## [1] 0.95
odds.ratio(X)$OR
## [1] 1.832054

tracking progress of a function

odds.ratio <- function(X, conf.level=0.95){
  cat("calculating odds ratio...","\n")
  OR <- X[1,1]*X[2,2]/(X[1,2]*X[2,1])
  logOR.SE <- sqrt(sum(1/X))
  
  cat("calculating confidence interval...","\n")
  alpha <- 1 - conf.level
  CI.lower <- exp(log(OR) - qnorm(1 - alpha/2)*logOR.SE)
  CI.upper <- exp(log(OR) + qnorm(1 - alpha/2)*logOR.SE)
  
  cat("done, returning output...","\n")
  out <- list(OR=OR, CI=c(CI.lower, CI.upper), conf.level=conf.level)
  return(out)
}
out <- odds.ratio(X)
## calculating odds ratio... 
## calculating confidence interval... 
## done, returning output...

Verifying Arguments

examples for verifying arguments

odds.ratio <- function(X, conf.level=0.95){
  stopifnot(!missing(X), is.matrix(X),dim(X)==c(2,2),X>0)

  cat("calculating odds ratio...","\n")
  OR <- X[1,1]*X[2,2]/(X[1,2]*X[2,1])
  logOR.SE <- sqrt(sum(1/X))
  
  cat("calculating confidence interval...","\n")
  alpha <- 1 - conf.level
  CI.lower <- exp(log(OR) - qnorm(1 - alpha/2)*logOR.SE)
  CI.upper <- exp(log(OR) + qnorm(1 - alpha/2)*logOR.SE)
  
  cat("done, returning output...","\n")
  out <- list(OR=OR, CI=c(CI.lower, CI.upper), conf.level=conf.level)
  return(out)
}
out <- odds.ratio(X)
## calculating odds ratio... 
## calculating confidence interval... 
## done, returning output...

Try catch

tryCatch function allows your code to be excuted even error exists

# result = tryCatch({
#     expr
# }, warning = function(w) {
#     warning-handler-code
# }, error = function(e) {
#     error-handler-code
# }
# )
result = tryCatch({
  2^6
}, warning = function(w) {
  print(paste("MY_WARNING:  ",w))
  return("warning")
}, error = function(e) {
  print(paste("MY_ERROR:  ",e))
  return("error")
}
)
result
## [1] 64
result = tryCatch({
  1:3 + 1:2
}, warning = function(w) {
  print(paste("MY_WARNING:  ",w))
  return("warning")
}, error = function(e) {
  print(paste("MY_ERROR:  ",e))
  return("error")
}
)
## [1] "MY_WARNING:   simpleWarning in 1:3 + 1:2: longer object length is not a multiple of shorter object length\n"
result
## [1] "warning"
result = tryCatch({
  "A" + "B"
}, warning = function(w) {
  print(paste("MY_WARNING:  ",w))
  return("warning")
}, error = function(e) {
  print(paste("MY_ERROR:  ",e))
  return("error")
}
)
## [1] "MY_ERROR:   Error in \"A\" + \"B\": non-numeric argument to binary operator\n"
result
## [1] "error"

Lexical scoping for R functions

Scoping is the set of rules that govern how R looks up the value of a symbol.

Name masking

x <- 1
h <- function(){
  y <- 2
  i <- function(){
    z <- 3
    c(x, y, z)
  }
  i()
}

h()
## [1] 1 2 3

A fresh start

j <- function(){
  if(!exists("a")){
    a <- 1
  } else {
    a <- a + 1
  }
  print(a)
}
j()
## [1] 1
j()
## [1] 1

Dynamic lookup

x <- 15
f <- function() x
f()
## [1] 15
x <- 20
f()
## [1] 20

Every operation is a function call

x <- 10; y <- 5; 
x + y
## [1] 15
'+'(x, y)
## [1] 15
for(i in 1:2) cat(i,' ')
## 1  2
'for'(i, 1:2, cat(i,' '))
## 1  2
x <- 1:10; x[3]
## [1] 3
'['(x,3)
## [1] 3

Other infix functions

## %%: Remainder operator
5 %% 3
## [1] 2
## %/%: Integer division
5 %/% 3
## [1] 1
## %*% Matrix multiplication
## to be discussed in the future.

## %o% Outer product
1:2 %o% 1:3
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    2    4    6
## %in% Match operator
c(1,2,3,4,5,6) %in% c(2,6,8)
## [1] FALSE  TRUE FALSE FALSE FALSE  TRUE

Function arguments – default value

f <- function(arg, brg=10) 
  list(arg = arg, brg = brg)
str(f(arg=2, brg=10))
## List of 2
##  $ arg: num 2
##  $ brg: num 10
str(f(arg=2)) 
## List of 2
##  $ arg: num 2
##  $ brg: num 10

Function arguments

f <- function(arg, brg=10) 
  list(arg = arg, brg = brg)
str(f(arg=2, brg=10))
## List of 2
##  $ arg: num 2
##  $ brg: num 10
2. then by prefix matching, 
f <- function(arg, brg=10) 
  list(arg = arg, brg = brg)
str(f(b=10, ar=2))
## List of 2
##  $ arg: num 2
##  $ brg: num 10
3. finally by position matching
f <- function(arg, brg=10) 
  list(a = arg, b = brg)
str(f(2, 10))
## List of 2
##  $ a: num 2
##  $ b: num 10

function calling habits

mean(1:10); mean(1:10, trim = 0.05)
## [1] 5.5
## [1] 5.5
mean(x=1:10)
## [1] 5.5
mean(1:10, n = T); mean(1:10, , FALSE); mean(1:10, 0.05); mean(, TRUE, x = c(1:10, NA))
## [1] 5.5
## [1] 5.5
## [1] 5.5
## [1] 5.5

Extra argument: …

myPlot <- function(x,y,...){
  plot(x,y,...)
}
myPlot(1:4,1:4,col=1:4)

calling a function given a list of arguments

do.call(mean, list(1:10, na.rm=TRUE))
## [1] 5.5
mean(1:10, na.rm=TRUE)
## [1] 5.5

R path function

getwd()
## [1] "/Users/zhuo/Dropbox (UFL)/teaching/2020FALL/lectures/Week1_Basic/functions"
WD0 <- getwd()
setwd("~") ## ~ represent user home directly 
getwd()
## [1] "/Users/zhuo"
setwd(WD0)
getwd()
## [1] "/Users/zhuo/Dropbox (UFL)/teaching/2020FALL/lectures/Week1_Basic/functions"

Directory information

dir()
## [1] "functions_files" "functions.html"  "functions.rmd"   "tmp"
dir("~")
##  [1] "Applications"                        
##  [2] "ClueGOConfiguration"                 
##  [3] "correlation_FDR_table.txt"           
##  [4] "CytoscapeConfiguration"              
##  [5] "Data_Correlation.txt"                
##  [6] "Desktop"                             
##  [7] "Documents"                           
##  [8] "Downloads"                           
##  [9] "Dropbox"                             
## [10] "Dropbox (Personal)"                  
## [11] "Dropbox (UFL)"                       
## [12] "GD-2.50"                             
## [13] "GD-2.50.tar.gz"                      
## [14] "httpsanalysis.ingenuity.compa_206442"
## [15] "Library"                             
## [16] "Movies"                              
## [17] "Music"                               
## [18] "perl5"                               
## [19] "Pictures"                            
## [20] "powerFigure_primary.pdf"             
## [21] "Public"                              
## [22] "randomization_list.csv"              
## [23] "VennDiagram_6vs18_6vs24.pdf"         
## [24] "Zotero"
file.exists("result.Rdata")
## [1] FALSE
file.exists("amatrix.csv")
## [1] FALSE
dir.create("tmp")
## Warning in dir.create("tmp"): 'tmp' already exists
dir.create("tmp") # there is a warning message if the folder already exist.
## Warning in dir.create("tmp"): 'tmp' already exists

Time function - Keep track the time of your progress

start <- Sys.time()
sum(1:1e4)
## [1] 50005000
end <- Sys.time()
end - start
## Time difference of 0.001255035 secs
start <- Sys.time()
result <- 0
for(i in 1:1e4){
  result <- result + i
}
result
## [1] 50005000
end <- Sys.time()
end - start
## Time difference of 0.007375002 secs

Lazy Evaluation

f <- function(x, y=x){
  x <- x + 1
  return(y)
}
f(1)
## [1] 2
f <- function(x, y=x){
  force(y) # force to evaluate y.
  x <- x + 1
  return(y)
}
f(1)
## [1] 1

Anonymous functions

function(x=4) g(x) + h(x)
## function(x=4) g(x) + h(x)
formals(function(x=4) g(x) + h(x))
## $x
## [1] 4
body(function(x=4) g(x) + h(x))
## g(x) + h(x)
environment(function(x=4) g(x) + h(x))
## <environment: R_GlobalEnv>

Call functions from another library

library(survival)

## seek for help about the survival package
help(package=survival)

## get help for a specific function
?coxph

## head(coxph)
## args(coxph)
## coxph


detach("package:survival", unload=TRUE) ## remove survival package from R loaded environment

Environments

Basic Environments

e <- new.env()
e$a <- FALSE
e$b <- "a"
e$c <- 2:3
e$d <- 1:3

Basic environments

Generally, an environment is similar to a list, with following important exceptions.

There are four special environments:

Environment search path

search() ## lists all parents of the global environment.
## [1] ".GlobalEnv"        "package:stats"     "package:graphics" 
## [4] "package:grDevices" "package:utils"     "package:datasets" 
## [7] "package:methods"   "Autoloads"         "package:base"
parent.env(globalenv())
## <environment: package:stats>
## attr(,"name")
## [1] "package:stats"
## attr(,"path")
## [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/stats"

Load a new package

library(mclust)
## Package 'mclust' version 5.4.5
## Type 'citation("mclust")' for citing this R package in publications.
search() ## lists all parents of the global environment.
##  [1] ".GlobalEnv"        "package:mclust"    "package:stats"    
##  [4] "package:graphics"  "package:grDevices" "package:utils"    
##  [7] "package:datasets"  "package:methods"   "Autoloads"        
## [10] "package:base"

Load a new package 2

environment(combn)
## <environment: namespace:utils>
library(combinat)
## 
## Attaching package: 'combinat'
## The following object is masked from 'package:utils':
## 
##     combn
search() ## lists all parents of the global environment.
##  [1] ".GlobalEnv"        "package:combinat"  "package:mclust"   
##  [4] "package:stats"     "package:graphics"  "package:grDevices"
##  [7] "package:utils"     "package:datasets"  "package:methods"  
## [10] "Autoloads"         "package:base"
environment(combn)
## <environment: namespace:combinat>
environment(utils::combn)
## <environment: namespace:utils>
environment(combinat::combn)
## <environment: namespace:combinat>

Function utility searching (optional)

x <- matrix(1:4,nrow=2,ncol=2)
nrow
## function (x) 
## dim(x)[1L]
## <bytecode: 0x7fe95df18e70>
## <environment: namespace:base>
nrow(x)
## [1] 2
dim(x)
## [1] 2 2
dim <- function() c(1,1)
## guess what is nrow(x)
nrow(x) ## A function in a package will always use the utilities within its namespace first
## [1] 2
## this is how R prevents external/global functions from affecting utilities functions inside a package
environment(nrow)
## <environment: namespace:base>
environment(dim)
## <environment: R_GlobalEnv>
environment(base::dim)
## NULL

References

Other exercises and readings

  1. Constructive confidence interval for one sided t test, verifying arguments.

  2. Read functional programming example http://adv-r.had.co.nz.