Zhiguang Huo (Caleb)
Thursday September 14, 2023
ggplot2 is based on the grammer of graphics, the idea that you can build every graph from the same few components:
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble  3.1.8     ✔ dplyr   1.1.0
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.4     ✔ forcats 1.0.0
## ✔ purrr   1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## tibble [234 × 11] (S3: tbl_df/tbl/data.frame)
##  $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
##  $ model       : chr [1:234] "a4" "a4" "a4" "a4" ...
##  $ displ       : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr [1:234] "f" "f" "f" "f" ...
##  $ cty         : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr [1:234] "p" "p" "p" "p" ...
##  $ class       : chr [1:234] "compact" "compact" "compact" "compact" ...
## # A tibble: 6 × 11
##   manufacturer model displ  year   cyl trans      drv     cty   hwy fl    class 
##   <chr>        <chr> <dbl> <int> <int> <chr>      <chr> <int> <int> <chr> <chr> 
## 1 audi         a4      1.8  1999     4 auto(l5)   f        18    29 p     compa…
## 2 audi         a4      1.8  1999     4 manual(m5) f        21    29 p     compa…
## 3 audi         a4      2    2008     4 manual(m6) f        20    31 p     compa…
## 4 audi         a4      2    2008     4 auto(av)   f        21    30 p     compa…
## 5 audi         a4      2.8  1999     6 auto(l5)   f        16    26 p     compa…
## 6 audi         a4      2.8  1999     6 manual(m5) f        18    26 p     compa…
ggplot(data = mpg) + 
  aes(x=displ, y=hwy, color = "blue") +
  geom_point() ## Doesn't work, aes only maps a variable (in the data) to a color.
ggplot(data = mpg) + 
  aes(x=displ, y=hwy, color = I("blue")) +
  geom_point() ## use I to indicate absolute color
ggplot(data = mpg) + 
  aes(x=displ, y=hwy ) +
  geom_point(color = "blue") ## or use the color here (outside aes)ggplot(data = mpg) + 
  aes(x=displ, y=hwy, size = "3") +
  geom_point() ## Doesn't work, aes only maps a variable (in the data) to a size
ggplot(data = mpg) + 
  aes(x=displ, y=hwy, size = I(3)) +
  geom_point() ## use I to indicate absolute size
ggplot(data = mpg) + 
  aes(x=displ, y=hwy ) +
  geom_point(size = 3) ## or use the size hereggplot(data = mpg) + 
  aes(x=displ, y=hwy, alpha = 1) +
  geom_point() ## Doesn't work, aes only maps a variable (in the data) to a alpha
ggplot(data = mpg) + 
  aes(x=displ, y=hwy, alpha = I(0.5)) +
  geom_point() ## use I to indicate absolute alpha
ggplot(data = mpg) + 
  aes(x=displ, y=hwy ) +
  geom_point(alpha = 0.5) ## or use the alpha herempg_sub <- subset(mpg, class!="suv")
ggplot(data = mpg_sub) + 
  aes(x=displ, y=hwy, shape = class) +
  geom_point()xvariable = "displ"
yvariable = "hwy"
ggplot(data = mpg) + 
  aes_string(x=xvariable, y=yvariable, color = "class") +
  geom_point()## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
##  [1] "geom_abline"            "geom_area"              "geom_bar"              
##  [4] "geom_bin_2d"            "geom_bin2d"             "geom_blank"            
##  [7] "geom_boxplot"           "geom_col"               "geom_contour"          
## [10] "geom_contour_filled"    "geom_count"             "geom_crossbar"         
## [13] "geom_curve"             "geom_density"           "geom_density_2d"       
## [16] "geom_density_2d_filled" "geom_density2d"         "geom_density2d_filled" 
## [19] "geom_dotplot"           "geom_errorbar"          "geom_errorbarh"        
## [22] "geom_freqpoly"          "geom_function"          "geom_hex"              
## [25] "geom_histogram"         "geom_hline"             "geom_jitter"           
## [28] "geom_label"             "geom_line"              "geom_linerange"        
## [31] "geom_map"               "geom_path"              "geom_point"            
## [34] "geom_pointrange"        "geom_polygon"           "geom_qq"               
## [37] "geom_qq_line"           "geom_quantile"          "geom_raster"           
## [40] "geom_rect"              "geom_ribbon"            "geom_rug"              
## [43] "geom_segment"           "geom_sf"                "geom_sf_label"         
## [46] "geom_sf_text"           "geom_smooth"            "geom_spoke"            
## [49] "geom_step"              "geom_text"              "geom_tile"             
## [52] "geom_violin"            "geom_vline"
ggplot(data = mpg) + 
  aes(displ, hwy, colour=class) + ## this is global color
  geom_point() + 
  geom_line()ggplot(data = mpg) + 
  aes(displ, hwy, colour = class) + 
  geom_point() + 
  geom_abline(aes(intercept = 0, slope = 5), color = "green") + 
  geom_hline(aes(yintercept = 30), color = "blue") + 
  geom_vline(aes(xintercept = 5), color = "red") ## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data = mpg) + 
  aes(displ, hwy) + 
  geom_point(aes(colour=class)) + 
  geom_smooth(aes(group=class), method="lm") ## `geom_smooth()` using formula = 'y ~ x'
ggplot(data = mpg) + 
  aes(displ, hwy, colour = class) + ## global aes will be applied to all higher level aes
  geom_point() + 
  geom_smooth(method="lm") ## `geom_smooth()` using formula = 'y ~ x'
ggplot(data = mpg) + 
  aes(displ, hwy, colour = class) + ## lower level aes will be applied to all higher level aes
  geom_point() + 
  geom_smooth(method="lm", se = F, size = 2) ## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
mpgSummary <- mpg %>%
  group_by(class) %>%
  summarize(meanDispl = mean(displ), sdDispl = sd(displ)) ## sd is standard deviation, standard error se = sd/sqrt(n)
ggplot(data = mpgSummary) + 
  aes(x=class, y=meanDispl, fill=class) + 
  geom_bar(stat="identity",
           colour="black", # Use black outlines,
           size=.3)       # Thinner linesggplot(data = mpgSummary) + 
  aes(x=class, y=meanDispl, fill=class) + 
  geom_bar(stat="identity",
           colour="black", # Use black outlines,
           size=.3) +      # Thinner lines
  geom_errorbar(aes(ymin=meanDispl-sdDispl, ymax=meanDispl+sdDispl),
                size=.3,    # Thinner lines
                width=.2
                )## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = mpg) + 
  aes(x = hwy) + 
  geom_histogram(aes(fill = class)) + 
  facet_grid(. ~ class) ## or facet_grid(cols = vars(class))## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = mpg) + 
  aes(x = hwy) + 
  geom_histogram(aes(fill = class)) + 
  facet_grid(class ~ .) ## or facet_grid(rows = vars(class))## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = mpg) + 
  aes(x = hwy) + 
  geom_histogram(aes(fill = class)) + 
  facet_grid(drv ~ class) ## or facet_grid(rows = vars(drv), cols = vars(class))## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
##   Reaction Days Subject
## 1 249.5600    0     308
## 2 258.7047    1     308
## 3 250.8006    2     308
## 4 321.4398    3     308
## 5 356.8519    4     308
ggplot(data=sleepstudy) + 
  aes(x = Days, y=Reaction, colour = Subject) +
  geom_smooth(method="lm") + 
  facet_wrap(~Subject)## `geom_smooth()` using formula = 'y ~ x'
sleepSummary <- sleepstudy %>% 
  group_by(Days) %>%
  summarize(Mean = mean(Reaction), SD = sd(Reaction), SE = sd(Reaction)/sqrt(n()))
ggplot(data=sleepSummary) + 
  aes(x = Days, y=Mean) +
  geom_path() + 
  geom_errorbar(aes(ymin=Mean-SE, ymax=Mean+SE),
                  size=0.5,    # Thinner lines
                  width=.2) # Subset 10 rows
set.seed(32611)
ss <- sample(1:32, 10)
df <- mtcars[ss, ]
sp <- ggplot(data = df) +
  aes(wt, mpg, label = rownames(df)) +
  geom_point()
# Add texts
sp + geom_text() ## geom_text need the label aessp + geom_text(size=6)
sp +  geom_text(hjust=0, vjust=0)
sp + geom_text(aes(fontface=2))
sp + geom_text(family = "Times New Roman")
sp + geom_text(aes(color=factor(cyl)))
sp + geom_text(aes(size=wt))
sp <- ggplot(data = df) +
  aes(wt, mpg, label = rownames(df)) +
  geom_point()
# Add texts
sp + geom_label()p <- ggplot(mtcars, aes(wt, mpg)) +
  geom_point(color = 'red') 
p + geom_text(aes(label = rownames(mtcars)),
              size = 3.5)## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
p <- ggplot(mpg) + 
  geom_point(aes(x = displ, y = hwy, colour=factor(cyl))) + 
  facet_wrap(~class)
p ## default theme_greyplot <- ggplot(mpg, aes(displ, hwy)) + geom_point()
plot + theme(
  panel.background = element_blank(),
  axis.text = element_blank()
)
plot + theme(
  axis.text = element_text(colour = "red", size = rel(1.5))
)
plot + theme(
  axis.line = element_line(arrow = arrow())
)
plot + theme(
  panel.background = element_rect(fill = "white"),
  plot.margin = margin(2, 2, 2, 2, "cm"),
  plot.background = element_rect(
    fill = "grey90",
    colour = "black",
    size = 1
  )
)
## all changes are relative to the default value
line
rect
text
title
aspect.ratio
axis.title
axis.title.x
axis.title.y 
axis.text
axis.text.x
axis.text.y
axis.ticks
axis.ticks.x
axis.ticks.y,
axis.ticks.length
axis.line
axis.line.x
axis.line.y
## for more options, see
?theme
theme_gray
ggplot(mpg) + 
  geom_point(aes(x = displ, y = hwy, colour=factor(cyl))) + 
  theme(legend.position = "none") black.bold.text <- element_text(face = "bold", color = "black", size=20)
ggplot(mpg, aes(displ, hwy, colour=class)) + geom_point() + 
    labs(title="hwy vs displ") + 
    theme_bw() + 
    theme(text = black.bold.text) black.bold.text <- element_text(face = "bold", color = "black", size=20)
ggplot(mpg, aes(displ, hwy, colour=class)) + geom_point() + 
    labs(title="hwy vs displ") + 
    theme_bw() + 
    theme(text = black.bold.text, panel.grid =element_blank()) black.bold.text <- element_text(face = "bold", color = "black", size=20)
red.italic.text <- element_text(face = "italic", color = "red", size=15)
ggplot(mpg, aes(displ, hwy, colour=class)) + geom_point() + 
    labs(title="hwy vs displ") + 
    theme_bw() + 
    theme(axis.text = black.bold.text , axis.title = black.bold.text, 
          legend.title = red.italic.text, 
          legend.text = black.bold.text) cols <- c("8" = "red", "4" = "blue", "6" = "darkgreen", "10" = "orange")
p + scale_colour_manual(values = cols)ggplot(mtcars) +
  aes(mpg, wt, colour = factor(cyl), fill = factor(cyl)) +
   geom_point() + 
  scale_colour_manual(
    values = cols,
    aesthetics = c("colour", "fill")
  )p <- ggplot(mtcars, aes(mpg, wt)) +
  geom_point(aes(colour = factor(cyl))) 
p + scale_x_continuous(breaks = c(15,25),
    labels = c("A", "B"),
    name = "My MPG") n <- 100
df <- data.frame(x = c(rnorm(n, 0, 3), rnorm(n, 0, 10)),
                 g = gl(2, n))
ggplot(df, aes(x, colour = g)) + stat_ecdf()n <- 100
set.seed(32611)
df <- data.frame(
  x = rnorm(n)
)
base <- ggplot(df, aes(x)) + geom_density()
base + stat_function(fun = dnorm, colour = "red") + xlim(c(-3,3))## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Setting the limits on a scale converts all values outside the range to NA.
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 136 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 136 rows containing missing values (`geom_point()`).