Biostatistical Computing, PHC 6068

Rcpp

Zhiguang Huo (Caleb)

Wednesday September 26, 2018

Why we need C++ in R

Don’t know C++? Not a problem.

Rcpp

Rcpp: for Seamless R and C++ Integration.

Prerequisites:

Warm up example

cppFunction() allows you to write C++ functions in R.

## Getting started with C++
library(Rcpp)
cppFunction('int add(int x, int y) {
  int sum = x + y;
  return sum;
  }')
# add works like a regular R function
add
## function (x, y) 
## .Call(<pointer: 0x115da52a0>, x, y)
add(3,2)
## [1] 5

Note on C++ code

More examples

  1. No inputs, scalar output.
  2. Scalar input, scalar output.
  3. Vector input, scalar output.
  4. Vector input, vector output.
  5. Matrix input, vector output.

No inputs, scalar output (1).

one <- function() 1L
cppFunction('int oneC() {
              return 1;
            }')
one()
## [1] 1
oneC()
## [1] 1

Scalar input, scalar output (2).

signR <- function(x) {
  if (x > 0) {
    1
  } else if (x == 0) {
    0
  } else {
    -1
  }
}
cppFunction('int signC(int x) {
              if (x > 0) {
                return 1;
              } else if (x == 0) {
                return 0;
              } else {
                return -1;
              }
            }')
signR(-10)
## [1] -1
signC(-10)
## [1] -1

Vector input, scalar output (3).

sumR <- function(x) {
  total <- 0
  for (i in seq_along(x)) {
    total <- total + x[i]
  }
  total
}
cppFunction('double sumC(NumericVector x) {
              int n = x.size();
              double total = 0;
              for(int i = 0; i < n; ++i) {
                total += x[i];
              }
              return total;
            }')

Vector input, scalar output (3) - evaluation

x <- runif(1e7)
system.time(sum(x))
##    user  system elapsed 
##   0.011   0.000   0.011
system.time(sumC(x))
##    user  system elapsed 
##   0.017   0.000   0.016
system.time(sumR(x))
##    user  system elapsed 
##   0.622   0.004   0.627

Vector input, vector output (4).

pdistR <- function(x, ys) {
  sqrt((x - ys) ^ 2)
}
cppFunction('NumericVector pdistC(double x, NumericVector ys) {
              int n = ys.size();
              NumericVector out(n);
               
              for(int i = 0; i < n; ++i) {
                out[i] = sqrt(pow(ys[i] - x, 2.0));
              }
              return out;
            }')
pdistR(3,1:5)
## [1] 2 1 0 1 2
pdistC(3,1:5)
## [1] 2 1 0 1 2

Matrix input, vector output (5).

head(rowSums)
##                                                                  
## 1 function (x, na.rm = FALSE, dims = 1L)                         
## 2 {                                                              
## 3     if (is.data.frame(x))                                      
## 4         x <- as.matrix(x)                                      
## 5     if (!is.array(x) || length(dn <- dim(x)) < 2L)             
## 6         stop("'x' must be an array of at least two dimensions")
cppFunction('NumericVector rowSumsC(NumericMatrix x) {
              int nrow = x.nrow(), ncol = x.ncol();
              NumericVector out(nrow);
            
              for (int i = 0; i < nrow; i++) {
                double total = 0;
                for (int j = 0; j < ncol; j++) {
                  total += x(i, j);
                }
                out[i] = total;
              }
              return out;
            }')
set.seed(1014)
x <- matrix(sample(100), 10)
rowSums(x)
##  [1] 458 558 488 458 536 537 488 491 508 528
rowSumsC(x)
##  [1] 458 558 488 458 536 537 488 491 508 528

More note on C++ code

Using sourceCpp

#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]

Using sourceCpp

#include <Rcpp.h>
using namespace Rcpp;

// [[Rcpp::export]]
double meanC(NumericVector x) {
  int n = x.size();
  double total = 0;

  for(int i = 0; i < n; ++i) {
    total += x[i];
  }
  return total/n;
}
sourceCpp('meanC.cpp')
meanC(1:100)

list - mean percentage error

#include <Rcpp.h>
using namespace Rcpp;

// [[Rcpp::export]]
double mpe(List mod) {
  if (!mod.inherits("lm")) stop("Input must be a linear model");

  NumericVector resid = as<NumericVector>(mod["residuals"]);
  NumericVector fitted = as<NumericVector>(mod["fitted.values"]);

  int n = resid.size();
  double err = 0;
  for(int i = 0; i < n; ++i) {
    err += resid[i] / (fitted[i] + resid[i]);
  }
  return err / n;
}

list - mean percentage error

sourceCpp('mpe.cpp') 
mod <- lm(mpg ~ wt, data = mtcars)
mpe(mod)

lapply

#include <Rcpp.h>
using namespace Rcpp;

// [[Rcpp::export]]
List lapply1(List input, Function f) {
  int n = input.size();
  List out(n);

  for(int i = 0; i < n; i++) {
    out[i] = f(input[i]);
  }

  return out;
}
sourceCpp('lapply1.cpp')

lapply(1:10,function(x) x^2)

lapply1(1:10,function(x) x^2)

Gibbs sampling example

gibbs_r <- function(N, thin) {
  mat <- matrix(nrow = N, ncol = 2)
  x <- y <- 0

  for (i in 1:N) {
    for (j in 1:thin) {
      x <- rgamma(1, 3, y * y + 4)
      y <- rnorm(1, 1 / (x + 1), 1 / sqrt(2 * (x + 1)))
    }
    mat[i, ] <- c(x, y)
  }
  mat
}

Gibbs sampling evaluation

sourceCpp('gibbs_cpp.cpp')

system.time(gibbs_r(100, 10))
system.time(gibbs_cpp(100, 10))

Using Rcpp in a package (minimum steps)

Also refer to https://caleb-huo.github.io/teaching/2018FALL/lectures/week6_Rpakcages/Rpackages/Rpackage.html for regular steps

  1. devtools::create() create an new R package
  2. devtools::use_rcpp()
  3. Copy your R code with documentation in inside R folder
  4. put cpp file in src
  5. devtools::document() generate R documentation
  6. devtools::build() build the package
  7. devtools::install() install the package

example r file

##' mean calling c
##'
##' mean calling c balalal
##' @title mean calling c
##' @param x a vector
##' @return mean of the vector
##' @author Caleb
##' @export
##' @useDynLib GatorCPP
##' @importFrom Rcpp sourceCpp
##' @examples
##' meanRC(1:10)
meanRC <- function(x){
  meanC(x)
}

example c file

#include <Rcpp.h>
using namespace Rcpp;

// [[Rcpp::export]]
double meanC(NumericVector x) {
  int n = x.size();
  double total = 0;

  for(int i = 0; i < n; ++i) {
    total += x[i];
  }
  return total / n;
}

Wrap up the package

WD <- "~/Desktop"
setwd(WD)
devtools::create("GatorC") ## create the package
setwd(file.path(WD, "GatorC")) ## get into the package
devtools::use_rcpp() ## intialize rcpp
## put in R code in R and cpp code in src
devtools::document() ## generate documentation
devtools::install() ## build the package
## use the package
library(GatorC)
?meanRC
meanRC(1:10)