Biostatistical Computing, PHC 6068

R graphics ggplot2

Zhiguang Huo (Caleb)

Monday September 21, 2020


ggplot2 is based on the grammer of graphics, the idea that you can build every graph from the same few components:

ggplot2 grammers

ggplot() - graphics are added up by different layers

Aesthetics — aes()

load ggplot2 package

library(ggplot2) ## part of tidyverse
mpg data data

## Classes 'tbl_df', 'tbl' and 'data.frame':    234 obs. of  11 variables:
##  $ manufacturer: chr  "audi" "audi" "audi" "audi" ...
##  $ model       : chr  "a4" "a4" "a4" "a4" ...
##  $ displ       : num  1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int  1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int  4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr  "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr  "f" "f" "f" "f" ...
##  $ cty         : int  18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int  29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr  "p" "p" "p" "p" ...
##  $ class       : chr  "compact" "compact" "compact" "compact" ...
## # A tibble: 6 x 11
##   manufacturer model displ  year   cyl trans      drv     cty   hwy fl    class 
##   <chr>        <chr> <dbl> <int> <int> <chr>      <chr> <int> <int> <chr> <chr> 
## 1 audi         a4      1.8  1999     4 auto(l5)   f        18    29 p     compa…
## 2 audi         a4      1.8  1999     4 manual(m5) f        21    29 p     compa…
## 3 audi         a4      2    2008     4 manual(m6) f        20    31 p     compa…
## 4 audi         a4      2    2008     4 auto(av)   f        21    30 p     compa…
## 5 audi         a4      2.8  1999     6 auto(l5)   f        16    26 p     compa…
## 6 audi         a4      2.8  1999     6 manual(m5) f        18    26 p     compa…

ggplot example

ggplot(data = mpg) + aes(x=displ, y=hwy) + geom_point()

ggplot: combine layers

myggplot <- ggplot(data = mpg) + aes(x=displ, y=hwy)
myggplot + geom_point()

aes – color (continuous)

ggplot(data = mpg) + 
  aes(x=displ, y=hwy, color = cyl) +

aes – color (categorical)

ggplot(data = mpg) + 
  aes(x=displ, y=hwy, color = class) +

aes – color (absolute color)

ggplot(data = mpg) + 
  aes(x=displ, y=hwy, color = "blue") +
  geom_point() ## Doesn't work, aes only maps a variable (in the data) to a color.

ggplot(data = mpg) + 
  aes(x=displ, y=hwy, color = I("blue")) +
  geom_point() ## use I to indicate absolute color

ggplot(data = mpg) + 
  aes(x=displ, y=hwy ) +
  geom_point(color = "blue") ## or use the color here

aes – color (categorical)

ggplot(data = mpg) + 
  aes(x=displ, y=hwy, color = class) +

aes – size

ggplot(data = mpg) + 
  aes(x=displ, y=hwy, size = cyl) +

aes – size (absolute size)

ggplot(data = mpg) + 
  aes(x=displ, y=hwy, size = "3") +
  geom_point() ## Doesn't work, aes only maps a variable (in the data) to a size

ggplot(data = mpg) + 
  aes(x=displ, y=hwy, size = I(3)) +
  geom_point() ## use I to indicate absolute size

ggplot(data = mpg) + 
  aes(x=displ, y=hwy ) +
  geom_point(size = 3) ## or use the size here

ggplot(data = mpg) + 
  aes(x=displ, y=hwy ) +
  geom_point(size = rel(3)) ## relative size

aes – alpha (transparency)

ggplot(data = mpg) + 
  aes(x=displ, y=hwy, alpha = cyl) +

aes – alpha (absolute alpha)

ggplot(data = mpg) + 
  aes(x=displ, y=hwy, alpha = 1) +
  geom_point() ## Doesn't work, aes only maps a variable (in the data) to a alpha

ggplot(data = mpg) + 
  aes(x=displ, y=hwy, alpha = I(0.5)) +
  geom_point() ## use I to indicate absolute alpha

ggplot(data = mpg) + 
  aes(x=displ, y=hwy ) +
  geom_point(alpha = 0.5) ## or use the alpha here

aes – shape

mpg_sub <- subset(mpg, class!="suv")
ggplot(data = mpg_sub) + 
  aes(x=displ, y=hwy, shape = class) +

mpg %>% 
  filter(class!="suv") %>%
  ggplot() + 
  aes(x=displ, y=hwy, shape = class) +

aes by variable names

xvariable = "displ"
yvariable = "hwy"

ggplot(data = mpg) + 
  aes_string(x=xvariable, y=yvariable, color = "class") +

Geom functions

ls(pattern = '^geom_', env = as.environment('package:ggplot2'))
##  [1] "geom_abline"     "geom_area"       "geom_bar"        "geom_bin2d"     
##  [5] "geom_blank"      "geom_boxplot"    "geom_col"        "geom_contour"   
##  [9] "geom_count"      "geom_crossbar"   "geom_curve"      "geom_density"   
## [13] "geom_density_2d" "geom_density2d"  "geom_dotplot"    "geom_errorbar"  
## [17] "geom_errorbarh"  "geom_freqpoly"   "geom_hex"        "geom_histogram" 
## [21] "geom_hline"      "geom_jitter"     "geom_label"      "geom_line"      
## [25] "geom_linerange"  "geom_map"        "geom_path"       "geom_point"     
## [29] "geom_pointrange" "geom_polygon"    "geom_qq"         "geom_qq_line"   
## [33] "geom_quantile"   "geom_raster"     "geom_rect"       "geom_ribbon"    
## [37] "geom_rug"        "geom_segment"    "geom_sf"         "geom_sf_label"  
## [41] "geom_sf_text"    "geom_smooth"     "geom_spoke"      "geom_step"      
## [45] "geom_text"       "geom_tile"       "geom_violin"     "geom_vline"

ggplot: geom_line by group

ggplot(data = mpg) + 
  aes(displ, hwy, colour=class) + 
  geom_point(aes(size=cyl)) + 

ggplot(data = mpg) + 
  aes(displ, hwy) + 
  geom_point(aes(size=cyl)) + 

ggplot: aes()

ggplot(data = mpg) + 
  aes(displ, hwy, colour=class) + ## this is global color
  geom_point(aes(size=cyl)) + 
ggplot(data = mpg) + 
  aes(displ, hwy) + 
  geom_point(aes(size=cyl)) + 
  geom_line(aes(colour=class)) ## this is local color

Line segments

ggplot(data = mpg) + 
  aes(displ, hwy, colour = class) + 
  geom_point() + 
  geom_abline(aes(intercept = 0, slope = 5), color = "green") + 
  geom_hline(aes(yintercept = 30), color = "blue") + 
  geom_vline(aes(xintercept = 5), color = "red") 

smooth by group 1

ggplot(data = mpg) + 
  aes(displ, hwy) + 
  geom_point(aes(colour=class)) + 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

smooth by group 2

ggplot(data = mpg) + 
  aes(displ, hwy) + 
  geom_point(aes(colour=class)) + 

smooth by group 3

ggplot(data = mpg) + 
  aes(displ, hwy) + 
  geom_point(aes(colour=class)) + 
  geom_smooth(aes(group=class), method="lm") 

smooth by group 4

ggplot(data = mpg) + 
  aes(displ, hwy, colour = class) + ## global aes will be applied to all higher level aes
  geom_point() + 

smooth by group 5

ggplot(data = mpg) + 
  aes(displ, hwy, colour = class) + ## lower level aes will be applied to all higher level aes
  geom_point() + 
  geom_smooth(method="lm", se = F, size = 2) 

ggplot() boxplot

mpgbox <- ggplot(data = mpg) + 
  aes(class, hwy) + 

ggplot() jitter

ggplot(data = mpg) + 
  aes(class, hwy, color=class) + 

ggplot() boxplot + jitter

ggplot(data = mpg) + 
  aes(class, hwy, color=class) + 
  geom_boxplot() + 

ggplot(data = mpg) + 
  aes(class, hwy, color=class) + 
  geom_jitter() + 

ggplot() violin plot

ggplot(data = mpg) + 
  aes(class, hwy, fill=class) + 

ggplot() bar plot 1

ggplot(mpg) + 
  aes(class) + 

ggplot(mpg) + 
  aes(class, color = class) + 

ggplot() bar plot 2

ggplot(mpg) + 
  aes(class, fill=as.factor(cyl)) + 

ggplot() bar plot 3

ggplot(mpg) + 
  aes(class, fill=as.factor(cyl)) + 
  geom_bar(position="dodge")  #side by side

ggplot() bar plot: how to specify error bar

mpgSummary <- mpg %>%
  group_by(class) %>%
  summarize(meanDispl = mean(displ), sdDispl = sd(displ)) ## sd is standard deviation, standard error se = sd/sqrt(n)
ggplot(data = mpgSummary) + 
  aes(x=class, y=meanDispl, fill=class) + 
  geom_bar(position=position_dodge(), stat="identity",
           colour="black", # Use black outlines,
           size=.3) +      # Thinner lines
  geom_errorbar(aes(ymin=meanDispl-sdDispl, ymax=meanDispl+sdDispl),
                size=.3,    # Thinner lines

ggplot() histogram simple example

ggplot(data = mpg) + 
  aes(x = hwy) + 
ggplot() histogram fill by color

ggplot(data = mpg) + 
  aes(x = hwy) + 
  geom_histogram(aes(fill = class))
ggplot() histogram facets by group (1)

ggplot(data = mpg) + 
  aes(x = hwy) + 
  geom_histogram(aes(fill = class)) + 
  facet_wrap(~ class)
ggplot() histogram facets by group (2)

ggplot(data = mpg) + 
  aes(x = hwy) + 
  geom_histogram(aes(fill = class)) + 
  facet_grid(. ~ class) ## or facet_grid(cols = vars(class))
ggplot() histogram facets by group (3)

ggplot(data = mpg) + 
  aes(x = hwy) + 
  geom_histogram(aes(fill = class)) + 
  facet_grid(class ~ .) ## or facet_grid(rows = vars(class))
ggplot() histogram facets by group (4)

ggplot(data = mpg) + 
  aes(x = hwy) + 
  geom_histogram(aes(fill = class)) + 
  facet_grid(drv ~ class) ## or facet_grid(rows = vars(drv), cols = vars(class))
longitudinal data visualization

sleepstudy: Reaction times in a sleep deprivation study

sleepstudy: Reaction times in a sleep deprivation study

head(sleepstudy, n=5)
##   Reaction Days Subject
## 1 249.5600    0     308
## 2 258.7047    1     308
## 3 250.8006    2     308
## 4 321.4398    3     308
## 5 356.8519    4     308

spaghetti plot

ggplot(data=sleepstudy) + 
  aes(x = Days, y=Reaction, colour = Subject) +

individual subject lm smooth

ggplot(data=sleepstudy) + 
  aes(x = Days, y=Reaction, colour = Subject) +
  geom_smooth(method="lm") + 

mean trajectory (with SE bar)

sleepSummary <- sleepstudy %>% 
  group_by(Days) %>%
  summarize(Mean = mean(Reaction), SD = sd(Reaction), SE = sd(Reaction)/sqrt(n()))
ggplot(data=sleepSummary) + 
  aes(x = Days, y=Mean) +
  geom_path() + 
  geom_errorbar(aes(ymin=Mean-SE, ymax=Mean+SE),
                  size=0.5,    # Thinner lines

Add text annotations to a graph

Text annotations using geom_text()

# Subset 10 rows
ss <- sample(1:32, 10)
df <- mtcars[ss, ]

sp <- ggplot(data = df) +
  aes(wt, mpg, label = rownames(df)) +
# Add texts
sp + geom_text() ## geom_text need the label aes

Other experiment

sp + geom_text(size=6)
sp +  geom_text(hjust=0, vjust=0)
sp + geom_text(aes(fontface=2))
sp + geom_text(family = "Times New Roman")
sp + geom_text(aes(color=factor(cyl)))
sp + geom_text(aes(size=wt))

Text annotations using geom_label()

sp <- ggplot(data = df) +
  aes(wt, mpg, label = rownames(df)) +
# Add texts
sp + geom_label()

Add a text annotation at a particular coordinate

# Solution 1
sp + geom_text(x=3, y=20, label="Scatter plot")

ggrepel: Avoid overlapping of text labels


Create a scatter plot and add labels

p <- ggplot(mtcars, aes(wt, mpg)) +
  geom_point(color = 'red') 
p + geom_text(aes(label = rownames(mtcars)),
              size = 3.5)

Use geom_text_repel

p + geom_text_repel(aes(label = rownames(mtcars)),
                    size = 3.5) 

Use label_text_repel

p + geom_label_repel(aes(label = rownames(mtcars)))

## p + geom_label_repel(aes(label = rownames(mtcars), fill = factor(cyl)), 
##                         color = 'white', size = 3.5
##                    )


p <- ggplot(mpg) + 
  geom_point(aes(x = displ, y = hwy, colour=factor(cyl))) + 
  labs(title = "New plot title", x = "New x label", y = "New y label")


examples on different themes

p <- ggplot(mpg) + 
  geom_point(aes(x = displ, y = hwy, colour=factor(cyl))) + 
p ## default theme_grey

p + theme_bw()

# p + theme_linedraw()
# p + theme_light()
# p + theme_dark()
# p + theme_minimal()
# p + theme_classic()
# p + theme_void()

More about the theme

p + theme(text = element_text(size=20),
        axis.text.x = element_text(angle=90, hjust=1,colour="red")) 

Four elements to control the theme


plot <- ggplot(mpg, aes(displ, hwy)) + geom_point()

plot + theme(
  panel.background = element_blank(),
  axis.text = element_blank()

plot + theme(
  axis.text = element_text(colour = "red", size = rel(1.5))

plot + theme(
  axis.line = element_line(arrow = arrow())

plot + theme(
  panel.background = element_rect(fill = "white"),
  plot.margin = margin(2, 2, 2, 2, "cm"),
  plot.background = element_rect(
    fill = "grey90",
    colour = "black",
    size = 1
## all changes are relative to the default value
## for more options, see

No legend

ggplot(mpg) + 
  geom_point(aes(x = displ, y = hwy, colour=factor(cyl))) + 
  theme(legend.position = "none") 

One of my favourate themes (1)

black.bold.text <- element_text(face = "bold", color = "black", size=20)
ggplot(mpg, aes(displ, hwy, colour=class)) + geom_point() + 
    labs(title="hwy vs displ") + 
    theme_bw() + 
    theme(text = black.bold.text) 

One of my favourate themes (2)

black.bold.text <- element_text(face = "bold", color = "black", size=20)
ggplot(mpg, aes(displ, hwy, colour=class)) + geom_point() + 
    labs(title="hwy vs displ") + 
    theme_bw() + 
    theme(text = black.bold.text, panel.grid =element_blank()) 

Change font

black.bold.text <- element_text(face = "bold", color = "black", size=20)
red.italic.text <- element_text(face = "italic", color = "red", size=15)

ggplot(mpg, aes(displ, hwy, colour=class)) + geom_point() + 
    labs(title="hwy vs displ") + 
    theme_bw() + 
    theme(axis.text = black.bold.text , axis.title = black.bold.text, 
          legend.title = red.italic.text, 
          legend.text = black.bold.text) 

Create your own discrete scale

p <- ggplot(mtcars, aes(mpg, wt)) +
  geom_point(aes(colour = factor(cyl)))

p + scale_colour_manual(values = c("red", "blue", "green"))

cols <- c("8" = "red", "4" = "blue", "6" = "darkgreen", "10" = "orange")
p + scale_colour_manual(values = cols)

ggplot(mtcars) +
  aes(mpg, wt, colour = factor(cyl), fill = factor(cyl)) +
   geom_point() + 
    values = cols,
    aesthetics = c("colour", "fill")

Create your own axis ticks

p <- ggplot(mtcars, aes(mpg, wt)) +
  geom_point(aes(colour = factor(cyl))) 
p + scale_x_continuous(breaks = c(15,25),
    labels = c("A", "B"),
    name = "My MPG") 

Stat transformation

empirical CDF

df <- data.frame(x = rnorm(1000))
ggplot(df, aes(x)) + stat_ecdf(geom = "step")

n <- 100
df <- data.frame(x = c(rnorm(n, 0, 3), rnorm(n, 0, 10)),
                 g = gl(2, n))
ggplot(df, aes(x, colour = g)) + stat_ecdf()


n <- 100
df <- data.frame(
  x = rnorm(n)
x <- df$x
base <- ggplot(df, aes(x)) + geom_density()
base + stat_function(fun = dnorm, colour = "red") + xlim(c(-3,3))


ggplot(mpg, aes(x = displ, y = hwy)) + geom_point() +   stat_ellipse()

stat_ellipse by group

ggplot(mpg, aes(x = displ, y = hwy, color=displ > 4)) + geom_point() +   stat_ellipse()


Previous example on mpg

p <- ggplot(mpg, aes(displ, hwy)) +
  geom_point() +

Setting the limits on the coordinate system performs a visual zoom.

p + coord_cartesian(xlim = c(3, 5), expand = FALSE)
Setting the limits on a scale converts all values outside the range to NA.

p + xlim(3, 5)
#the same as p + scale_x_continuous(limits = c(325, 500))

resize the plot

p <- ggplot(mpg, aes(displ, hwy)) +  geom_point()
p + coord_fixed(ratio = 0.5)

p + coord_fixed(ratio = 0.1)

flip x and y

ggplot(mpg, aes(class, hwy)) +
  geom_boxplot() +

ggplot Cheat Sheet