Biostatistical Computing, PHC 6068

R graphics ggplot2

Zhiguang Huo (Caleb)

Monday September 18, 2017

ggplot2

ggplot2 is based on the grammer of graphics, the idea that you can build every graph from the same few components:

ggplot2 cheetsheet: https://www.rstudio.com/wp-content/uploads/2015/03/ggplot2-cheatsheet.pdf

ggplot2 grammers

ggplot2 usage – qplot() and ggplot2()

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2

qplot()

mpg data data

str(mpg)
## Classes 'tbl_df', 'tbl' and 'data.frame':    234 obs. of  11 variables:
##  $ manufacturer: chr  "audi" "audi" "audi" "audi" ...
##  $ model       : chr  "a4" "a4" "a4" "a4" ...
##  $ displ       : num  1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int  1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int  4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr  "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr  "f" "f" "f" "f" ...
##  $ cty         : int  18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int  29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr  "p" "p" "p" "p" ...
##  $ class       : chr  "compact" "compact" "compact" "compact" ...
head(mpg)
## # A tibble: 6 × 11
##   manufacturer model displ  year   cyl      trans   drv   cty   hwy    fl
##          <chr> <chr> <dbl> <int> <int>      <chr> <chr> <int> <int> <chr>
## 1         audi    a4   1.8  1999     4   auto(l5)     f    18    29     p
## 2         audi    a4   1.8  1999     4 manual(m5)     f    21    29     p
## 3         audi    a4   2.0  2008     4 manual(m6)     f    20    31     p
## 4         audi    a4   2.0  2008     4   auto(av)     f    21    30     p
## 5         audi    a4   2.8  1999     6   auto(l5)     f    16    26     p
## 6         audi    a4   2.8  1999     6 manual(m5)     f    18    26     p
## # ... with 1 more variables: class <chr>

basic scattered plot

qplot(displ, hwy, data = mpg) 

## aesthetic: displ, hwy. data: mpg

scattered plot with color

qplot(displ, hwy, colour = class, data = mpg) 

## aesthetic: displ, hwy, class; data: mpg

scattered plot with shape

mpg_sub <- subset(mpg, class!="suv") ## qplot support a maximum of 6 shapes
qplot(displ, hwy, shape = class, data = mpg_sub) ## aesthetic: displ, hwy, class; data: mpg

scattered plot with geom = “line”

qplot(displ, hwy, data = mpg, geom = "line") 

## aesthetic: displ, hwy
## data: mpg
## Geometries: line

scattered plot with geom = “path”

qplot(displ, hwy, data = mpg, geom = "path") 

## aesthetic: displ, hwy
## data: mpg
## Geometries: path

More about geom – geom = “boxplot”

qplot(class, displ, data = mpg, geom = "boxplot") 

## aesthetic: displ, class
## data: mpg
## Geometries: boxplot

More about geom – geom = “jitter”

qplot(class, displ, data = mpg, geom = "jitter") 

## aesthetic: displ, class
## data: mpg
## Geometries: jitter

More about geom – geom = c(“jitter”, “boxplot”)

qplot(class, displ, data = mpg, geom = c("boxplot", "jitter")) 

## aesthetic: displ, class
## data: mpg
## Geometries: jitter

More about geom – geom = “histogram”

qplot(displ, data = mpg, geom = "histogram") 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## aesthetic: displ
## data: mpg
## Geometries: histogram

More about geom – geom = “density”

qplot(displ, data = mpg, geom = "density") 

## aesthetic: displ
## data: mpg
## Geometries: density

Facets

qplot(displ, data = mpg, geom = "density", facets = ~class) 

## aesthetic: displ
## data: mpg
## Geometries: density
## facets: ~class

ggplot() - graphics are added up by different layers

Compared to qplot(), it’s easier to use multiple dataset in ggplot().

Aesthetics

ggplot example

ggplot(mpg) + aes(x=displ, y=hwy, size=cyl) + geom_point()

ggplot: combine layers

myggplot <- ggplot(mpg) + aes(x=displ, y=hwy, color=cyl)
myggplot + geom_point()

ggplot: geom_line by group

ggplot(data = mpg, aes(displ, hwy, colour=class)) + geom_point(aes(size=cyl)) + geom_line(aes(group = class))

Geom functions

ls(pattern = '^geom_', env = as.environment('package:ggplot2'))
##  [1] "geom_abline"     "geom_area"       "geom_bar"       
##  [4] "geom_bin2d"      "geom_blank"      "geom_boxplot"   
##  [7] "geom_col"        "geom_contour"    "geom_count"     
## [10] "geom_crossbar"   "geom_curve"      "geom_density"   
## [13] "geom_density_2d" "geom_density2d"  "geom_dotplot"   
## [16] "geom_errorbar"   "geom_errorbarh"  "geom_freqpoly"  
## [19] "geom_hex"        "geom_histogram"  "geom_hline"     
## [22] "geom_jitter"     "geom_label"      "geom_line"      
## [25] "geom_linerange"  "geom_map"        "geom_path"      
## [28] "geom_point"      "geom_pointrange" "geom_polygon"   
## [31] "geom_qq"         "geom_quantile"   "geom_raster"    
## [34] "geom_rect"       "geom_ribbon"     "geom_rug"       
## [37] "geom_segment"    "geom_smooth"     "geom_spoke"     
## [40] "geom_step"       "geom_text"       "geom_tile"      
## [43] "geom_violin"     "geom_vline"

smooth by group 1

ggplot(data = mpg, aes(displ, hwy, colour=class)) + geom_point() + geom_smooth(aes(group = class), method="lm", se = F, size = 1) 

smooth by group 2

ggplot(data = mpg, aes(displ, hwy, colour=class)) + geom_smooth(aes(group = class), method="lm", se = T, size = 2) 

ggplot() boxplot

mpgbox <- ggplot(mpg, aes(class, hwy)) + geom_boxplot(aes(fill=class))
mpgbox

ggplot() histogram basic

ggplot(mpg, aes(x = hwy)) + geom_histogram(binwidth = 3, fill = "steelblue")

ggplot() histogram fill by color

ggplot(mpg, aes(x = hwy)) + geom_histogram(binwidth = 3, aes(fill = class))

ggplot() histogram facets by group

ggplot(mpg, aes(x = hwy, fill = class)) + geom_histogram(binwidth = 3) + facet_grid(class ~ .)

bar plot with standard deviation

mpgSummary <- data.frame(class = with(mpg, tapply(class, class, unique)), 
  meanDispl = with(mpg, tapply(displ, class, mean)),
                         sdDispl = with(mpg, tapply(displ, class, sd)))

ggplot(mpgSummary, aes(x=class, y=meanDispl, fill=class)) + 
  geom_bar(position=position_dodge(), stat="identity",
           colour="black", # Use black outlines,
           size=.3) +      # Thinner lines
  geom_errorbar(aes(ymin=meanDispl-sdDispl, ymax=meanDispl+sdDispl),
                size=.3,    # Thinner lines
                width=.2,
                position=position_dodge(.9))

facet

ggplot() histogram facet_wrap

ggplot(mpg, aes(x = hwy, fill = class)) + geom_histogram(binwidth = 3) + facet_wrap(~ class)

facet_null: back to single panel

ggplot(mpg, aes(x = hwy, fill = class)) + geom_histogram(binwidth = 3) + facet_grid(class ~ .)  + facet_null()

bar plot

ggplot(mpg, aes(class, fill=as.factor(cyl))) + geom_bar(position="dodge") #side by side

ggplot(mpg, aes(class, fill=as.factor(cyl))) + geom_bar(position="fill") #fill

ggplot(mpg, aes(class, fill=as.factor(cyl))) + geom_bar(position="stack") #default

Stat transformation

empirical CDF

df <- data.frame(x = rnorm(1000))
ggplot(df, aes(x)) + stat_ecdf(geom = "step")

n <- 100
df <- data.frame(x = c(rnorm(n, 0, 3), rnorm(n, 0, 10)),
                 g = gl(2, n))
ggplot(df, aes(x, colour = g)) + stat_ecdf()

stat_function

n <- 100
set.seed(32611)
df <- data.frame(
  x = rnorm(n)
)
x <- df$x
base <- ggplot(df, aes(x)) + geom_density()
base + stat_function(fun = dnorm, colour = "red") + xlim(c(-3,3))

stat_ellipse

ggplot(mpg, aes(x = displ, y = hwy)) + geom_point() +   stat_ellipse()

stat_ellipse by group

ggplot(mpg, aes(x = displ, y = hwy, color=displ > 4)) + geom_point() +   stat_ellipse()

Coordinate

Our old friend mpg

p <- ggplot(mpg, aes(displ, hwy)) +
  geom_point() +
  geom_smooth()

p
## `geom_smooth()` using method = 'loess'

Setting the limits on the coordinate system performs a visual zoom.

p + coord_cartesian(xlim = c(3, 5), expand = FALSE)
## `geom_smooth()` using method = 'loess'

Setting the limits on a scale converts all values outside the range to NA.

p + xlim(3, 5)
## `geom_smooth()` using method = 'loess'
## Warning: Removed 136 rows containing non-finite values (stat_smooth).
## Warning: Removed 136 rows containing missing values (geom_point).

#the same as p + scale_x_continuous(limits = c(325, 500))

resize the plot

p <- ggplot(mpg, aes(displ, hwy)) +  geom_point()
p + coord_fixed(ratio = 0.5)

p + coord_fixed(ratio = 0.1)

flip x and y

ggplot(mpg, aes(class, hwy)) +
  geom_boxplot() +
  coord_flip()

draw a world map

world <- map_data("world")
## Warning: package 'maps' was built under R version 3.3.2
worldmap <- ggplot(world, aes(x=long, y=lat, group=group)) +
  geom_path() +
  scale_y_continuous(breaks = (-2:2) * 30) +
  scale_x_continuous(breaks = (-4:4) * 45)

worldmap

worldmap + coord_map("ortho", orientation = c(41, -74, 0))

polar coordinte for pie chart

pie <- ggplot(mpg, aes(x = factor(1), fill = factor(cyl))) +
  geom_bar(width = 1)
pie + coord_polar(theta = "y")

Theme

examples on different themes

p <- ggplot(mpg) + geom_point(aes(x = displ, y = hwy,
                                     colour=factor(cyl))) + facet_wrap(~class)
p

p + theme_bw()

More about theme

p <- ggplot(mpg) + geom_point(aes(x = displ, y = hwy,
                                     colour=factor(cyl))) 
p + theme(text = element_text(size=20),
        axis.text.x = element_text(angle=90, hjust=1)) 

labels

p <- ggplot(mpg) + geom_point(aes(x = displ, y = hwy,
                                     colour=factor(cyl))) 
p + 
  labs(title = "New plot title", x = "New x label", y = "New y label")

## more please check ?labs
p + labs(title = "New plot title", x = "New x label", y = "New y label")+ theme(axis.text.x = element_text(family = "Arial", colour = "red")) 

## more please check ?theme

How to learn more about ggplot2

  1. ggplot2 cheetsheet: https://www.rstudio.com/wp-content/uploads/2015/03/ggplot2-cheatsheet.pdf
  2. google
knitr::purl("Rgraph_ggplot2.rmd", output = "Rgraph_ggplot2.R ", documentation = 2)
## 
## 
## processing file: Rgraph_ggplot2.rmd
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |.                                                                |   1%
  |                                                                       
  |.                                                                |   2%
  |                                                                       
  |..                                                               |   3%
  |                                                                       
  |...                                                              |   4%
  |                                                                       
  |...                                                              |   5%
  |                                                                       
  |....                                                             |   6%
  |                                                                       
  |.....                                                            |   7%
  |                                                                       
  |.....                                                            |   8%
  |                                                                       
  |......                                                           |   9%
  |                                                                       
  |.......                                                          |  10%
  |                                                                       
  |.......                                                          |  11%
  |                                                                       
  |........                                                         |  12%
  |                                                                       
  |.........                                                        |  14%
  |                                                                       
  |.........                                                        |  15%
  |                                                                       
  |..........                                                       |  16%
  |                                                                       
  |...........                                                      |  17%
  |                                                                       
  |............                                                     |  18%
  |                                                                       
  |............                                                     |  19%
  |                                                                       
  |.............                                                    |  20%
  |                                                                       
  |..............                                                   |  21%
  |                                                                       
  |..............                                                   |  22%
  |                                                                       
  |...............                                                  |  23%
  |                                                                       
  |................                                                 |  24%
  |                                                                       
  |................                                                 |  25%
  |                                                                       
  |.................                                                |  26%
  |                                                                       
  |..................                                               |  27%
  |                                                                       
  |..................                                               |  28%
  |                                                                       
  |...................                                              |  29%
  |                                                                       
  |....................                                             |  30%
  |                                                                       
  |....................                                             |  31%
  |                                                                       
  |.....................                                            |  32%
  |                                                                       
  |......................                                           |  33%
  |                                                                       
  |......................                                           |  34%
  |                                                                       
  |.......................                                          |  35%
  |                                                                       
  |........................                                         |  36%
  |                                                                       
  |........................                                         |  38%
  |                                                                       
  |.........................                                        |  39%
  |                                                                       
  |..........................                                       |  40%
  |                                                                       
  |..........................                                       |  41%
  |                                                                       
  |...........................                                      |  42%
  |                                                                       
  |............................                                     |  43%
  |                                                                       
  |............................                                     |  44%
  |                                                                       
  |.............................                                    |  45%
  |                                                                       
  |..............................                                   |  46%
  |                                                                       
  |..............................                                   |  47%
  |                                                                       
  |...............................                                  |  48%
  |                                                                       
  |................................                                 |  49%
  |                                                                       
  |................................                                 |  50%
  |                                                                       
  |.................................                                |  51%
  |                                                                       
  |..................................                               |  52%
  |                                                                       
  |...................................                              |  53%
  |                                                                       
  |...................................                              |  54%
  |                                                                       
  |....................................                             |  55%
  |                                                                       
  |.....................................                            |  56%
  |                                                                       
  |.....................................                            |  57%
  |                                                                       
  |......................................                           |  58%
  |                                                                       
  |.......................................                          |  59%
  |                                                                       
  |.......................................                          |  60%
  |                                                                       
  |........................................                         |  61%
  |                                                                       
  |.........................................                        |  62%
  |                                                                       
  |.........................................                        |  64%
  |                                                                       
  |..........................................                       |  65%
  |                                                                       
  |...........................................                      |  66%
  |                                                                       
  |...........................................                      |  67%
  |                                                                       
  |............................................                     |  68%
  |                                                                       
  |.............................................                    |  69%
  |                                                                       
  |.............................................                    |  70%
  |                                                                       
  |..............................................                   |  71%
  |                                                                       
  |...............................................                  |  72%
  |                                                                       
  |...............................................                  |  73%
  |                                                                       
  |................................................                 |  74%
  |                                                                       
  |.................................................                |  75%
  |                                                                       
  |.................................................                |  76%
  |                                                                       
  |..................................................               |  77%
  |                                                                       
  |...................................................              |  78%
  |                                                                       
  |...................................................              |  79%
  |                                                                       
  |....................................................             |  80%
  |                                                                       
  |.....................................................            |  81%
  |                                                                       
  |.....................................................            |  82%
  |                                                                       
  |......................................................           |  83%
  |                                                                       
  |.......................................................          |  84%
  |                                                                       
  |........................................................         |  85%
  |                                                                       
  |........................................................         |  86%
  |                                                                       
  |.........................................................        |  88%
  |                                                                       
  |..........................................................       |  89%
  |                                                                       
  |..........................................................       |  90%
  |                                                                       
  |...........................................................      |  91%
  |                                                                       
  |............................................................     |  92%
  |                                                                       
  |............................................................     |  93%
  |                                                                       
  |.............................................................    |  94%
  |                                                                       
  |..............................................................   |  95%
  |                                                                       
  |..............................................................   |  96%
  |                                                                       
  |...............................................................  |  97%
  |                                                                       
  |................................................................ |  98%
  |                                                                       
  |................................................................ |  99%
  |                                                                       
  |.................................................................| 100%
## output file: Rgraph_ggplot2.R
## [1] "Rgraph_ggplot2.R "