Programming basics for Biostatistics 6099

Rcpp

Zhiguang Huo (Caleb)

Tuesday Oct 3rd, 2023

Why we need C++ in R

Don’t know C++? Not a problem.

Rcpp

Rcpp: for Seamless R and C++ Integration.

Rcpp Prerequisites:

Warm up example

cppFunction() allows you to write C++ functions in R.

## Getting started with C++
library(Rcpp)
cppFunction('int add(int x, int y) {
  int sum = x + y;
  return sum;
  }')
# add works like a regular R function
add
## function (x, y) 
## .Call(<pointer: 0x106cc0cc0>, x, y)
add(3,2)
## [1] 5

Note on C++ code

More examples

  1. No inputs, scalar output.
  2. Scalar input, scalar output.
  3. Vector input, scalar output.
  4. Vector input, vector output.
  5. Matrix input, vector output.

No inputs, scalar output (1).

one <- function() 1L
cppFunction('int oneC() {
              return 1;
            }')
one()
## [1] 1
oneC()
## [1] 1

Scalar input, scalar output (2).

signR <- function(x) {
  if (x > 0) {
    1
  } else if (x == 0) {
    0
  } else {
    -1
  }
}
cppFunction('int signC(int x) {
              if (x > 0) {
                return 1;
              } else if (x == 0) {
                return 0;
              } else {
                return -1;
              }
            }')
signR(-10)
## [1] -1
signC(-10)
## [1] -1

Vector input, scalar output (3).

sumR <- function(x) {
  total <- 0
  for (i in seq_along(x)) {
    total <- total + x[i]
  }
  total
}
cppFunction('double sumC(NumericVector x) {
              int n = x.size();
              double total = 0;
              for(int i = 0; i < n; ++i) {
                total += x[i];
              }
              return total;
            }')

Vector input, scalar output (3) - evaluation

x <- runif(1e7)
system.time(sum(x))
##    user  system elapsed 
##   0.014   0.000   0.014
system.time(sumC(x))
##    user  system elapsed 
##   0.009   0.000   0.010
system.time(sumR(x))
##    user  system elapsed 
##   0.140   0.002   0.142

Vector input, vector output (4).

pdistR <- function(x, ys) {
  sqrt((x - ys) ^ 2)
}
cppFunction('NumericVector pdistC(double x, NumericVector ys) {
              int n = ys.size();
              NumericVector out(n);
               
              for(int i = 0; i < n; ++i) {
                out[i] = sqrt(pow(ys[i] - x, 2.0));
              }
              return out;
            }')
pdistR(3,1:5)
## [1] 2 1 0 1 2
pdistC(3,1:5)
## [1] 2 1 0 1 2

Matrix input, vector output (5).

rowSums
cppFunction('NumericVector rowSumsC(NumericMatrix x) {
              int nrow = x.nrow(), ncol = x.ncol();
              NumericVector out(nrow);
            
              for (int i = 0; i < nrow; i++) {
                double total = 0;
                for (int j = 0; j < ncol; j++) {
                  total += x(i, j);
                }
                out[i] = total;
              }
              return out;
            }')
set.seed(1014)
x <- matrix(sample(100), 10)
rowSums(x)
##  [1] 446 514 480 514 352 627 525 586 572 434
rowSumsC(x)
##  [1] 446 514 480 514 352 627 525 586 572 434

More note on C++ code

Using sourceCpp

#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]

Using sourceCpp

#include <Rcpp.h>
using namespace Rcpp;

// [[Rcpp::export]]
double meanC(NumericVector x) {
  int n = x.size();
  double total = 0;

  for(int i = 0; i < n; ++i) {
    total += x[i];
  }
  return total/n;
}
sourceCpp('meanC.cpp')
meanC(1:100)

list - mean percentage error

#include <Rcpp.h>
using namespace Rcpp;

// [[Rcpp::export]]
double mpe(List mod) {
  if (!mod.inherits("lm")) stop("Input must be a linear model");

  NumericVector resid = as<NumericVector>(mod["residuals"]);
  NumericVector fitted = as<NumericVector>(mod["fitted.values"]);

  int n = resid.size();
  double err = 0;
  for(int i = 0; i < n; ++i) {
    err += resid[i] / (fitted[i] + resid[i]);
  }
  return err / n;
}

list - mean percentage error

sourceCpp('mpe.cpp') 
mod <- lm(mpg ~ wt, data = mtcars)
mpe(mod)

lapply

#include <Rcpp.h>
using namespace Rcpp;

// [[Rcpp::export]]
List lapply1(List input, Function f) {
  int n = input.size();
  List out(n);

  for(int i = 0; i < n; i++) {
    out[i] = f(input[i]);
  }

  return out;
}
sourceCpp('lapply1.cpp')

lapply(1:10,function(x) x^2)

lapply1(1:10,function(x) x^2)

Gibbs sampling example

gibbs_r <- function(N, thin) {
  mat <- matrix(nrow = N, ncol = 2)
  x <- y <- 0

  for (i in 1:N) {
    for (j in 1:thin) {
      x <- rgamma(1, 3, y * y + 4)
      y <- rnorm(1, 1 / (x + 1), 1 / sqrt(2 * (x + 1)))
    }
    mat[i, ] <- c(x, y)
  }
  mat
}

Gibbs sampling evaluation

sourceCpp('gibbs_cpp.cpp')

system.time(gibbs_r(100, 10))
system.time(gibbs_cpp(100, 10))

Using Rcpp in a package (minimum steps)

Also refer to https://caleb-huo.github.io/teaching/2023FALL/lectures/Week7_RPackage/Rpackage.html for regular steps

  1. usethis::create_package() create an new R package
  2. devtools::document()
  3. usethis::use_rcpp()
  4. Copy your R code with documentation in inside R folder
  5. put cpp file in src
  6. Rcpp::compileAttributes()
  7. devtools::document() generate R documentation
  8. devtools::install() install the package

example r file

If your package name is GatorCPP

##' mean calling c
##'
##' mean calling c balalal
##' @title mean calling c
##' @param x a vector
##' @return mean of the vector
##' @author Caleb
##' @export
##' @useDynLib GatorCPP
##' @importFrom Rcpp sourceCpp
##' @examples
##' meanRC(1:10)
meanRC <- function(x){
  meanC(x)
}

example c file

#include <Rcpp.h>
using namespace Rcpp;

// [[Rcpp::export]]
double meanC(NumericVector x) {
  int n = x.size();
  double total = 0;

  for(int i = 0; i < n; ++i) {
    total += x[i];
  }
  return total / n;
}

Wrap up the package

WD <- "~/Desktop"
setwd(WD)
usethis::create_package("GatorCPP",open = FALSE) ## create the package
setwd(file.path(WD, "GatorCPP")) ## get into the package
devtools::document() ## need to do this, otherwise the next step use_rcpp won't work
usethis::use_rcpp() ## intialize rcpp
## put in R code in R and cpp code in src
Rcpp::compileAttributes() ## will generate a RcppExports.R
# devtools::load_all() ## this step seems not necessary this year
devtools::document() ## generate documentation
devtools::install() ## install the package
## use the package
library(GatorCPP)
#?meanRC
meanRC(1:10)

Reference