Zhiguang Huo (Caleb)
Monday September 19, 2022
ggplot2 is based on the grammer of graphics, the idea that you can build every graph from the same few components:
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──## ✓ tibble  3.0.4     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ✓ purrr   0.3.4## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()## tibble [234 × 11] (S3: tbl_df/tbl/data.frame)
##  $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
##  $ model       : chr [1:234] "a4" "a4" "a4" "a4" ...
##  $ displ       : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr [1:234] "f" "f" "f" "f" ...
##  $ cty         : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr [1:234] "p" "p" "p" "p" ...
##  $ class       : chr [1:234] "compact" "compact" "compact" "compact" ...## # A tibble: 6 x 11
##   manufacturer model displ  year   cyl trans      drv     cty   hwy fl    class 
##   <chr>        <chr> <dbl> <int> <int> <chr>      <chr> <int> <int> <chr> <chr> 
## 1 audi         a4      1.8  1999     4 auto(l5)   f        18    29 p     compa…
## 2 audi         a4      1.8  1999     4 manual(m5) f        21    29 p     compa…
## 3 audi         a4      2    2008     4 manual(m6) f        20    31 p     compa…
## 4 audi         a4      2    2008     4 auto(av)   f        21    30 p     compa…
## 5 audi         a4      2.8  1999     6 auto(l5)   f        16    26 p     compa…
## 6 audi         a4      2.8  1999     6 manual(m5) f        18    26 p     compa…ggplot(data = mpg) + 
  aes(x=displ, y=hwy, color = "blue") +
  geom_point() ## Doesn't work, aes only maps a variable (in the data) to a color.
ggplot(data = mpg) + 
  aes(x=displ, y=hwy, color = I("blue")) +
  geom_point() ## use I to indicate absolute color
ggplot(data = mpg) + 
  aes(x=displ, y=hwy ) +
  geom_point(color = "blue") ## or use the color here (outside aes)ggplot(data = mpg) + 
  aes(x=displ, y=hwy, size = "3") +
  geom_point() ## Doesn't work, aes only maps a variable (in the data) to a size
ggplot(data = mpg) + 
  aes(x=displ, y=hwy, size = I(3)) +
  geom_point() ## use I to indicate absolute size
ggplot(data = mpg) + 
  aes(x=displ, y=hwy ) +
  geom_point(size = 3) ## or use the size hereggplot(data = mpg) + 
  aes(x=displ, y=hwy, alpha = 1) +
  geom_point() ## Doesn't work, aes only maps a variable (in the data) to a alpha
ggplot(data = mpg) + 
  aes(x=displ, y=hwy, alpha = I(0.5)) +
  geom_point() ## use I to indicate absolute alpha
ggplot(data = mpg) + 
  aes(x=displ, y=hwy ) +
  geom_point(alpha = 0.5) ## or use the alpha herempg_sub <- subset(mpg, class!="suv")
ggplot(data = mpg_sub) + 
  aes(x=displ, y=hwy, shape = class) +
  geom_point()xvariable = "displ"
yvariable = "hwy"
ggplot(data = mpg) + 
  aes_string(x=xvariable, y=yvariable, color = "class") +
  geom_point()##  [1] "geom_abline"            "geom_area"              "geom_bar"              
##  [4] "geom_bin2d"             "geom_blank"             "geom_boxplot"          
##  [7] "geom_col"               "geom_contour"           "geom_contour_filled"   
## [10] "geom_count"             "geom_crossbar"          "geom_curve"            
## [13] "geom_density"           "geom_density_2d"        "geom_density_2d_filled"
## [16] "geom_density2d"         "geom_density2d_filled"  "geom_dotplot"          
## [19] "geom_errorbar"          "geom_errorbarh"         "geom_freqpoly"         
## [22] "geom_function"          "geom_hex"               "geom_histogram"        
## [25] "geom_hline"             "geom_jitter"            "geom_label"            
## [28] "geom_line"              "geom_linerange"         "geom_map"              
## [31] "geom_path"              "geom_point"             "geom_pointrange"       
## [34] "geom_polygon"           "geom_qq"                "geom_qq_line"          
## [37] "geom_quantile"          "geom_raster"            "geom_rect"             
## [40] "geom_ribbon"            "geom_rug"               "geom_segment"          
## [43] "geom_sf"                "geom_sf_label"          "geom_sf_text"          
## [46] "geom_smooth"            "geom_spoke"             "geom_step"             
## [49] "geom_text"              "geom_tile"              "geom_violin"           
## [52] "geom_vline"ggplot(data = mpg) + 
  aes(displ, hwy, colour=class) + ## this is global color
  geom_point() + 
  geom_line()ggplot(data = mpg) + 
  aes(displ, hwy, colour = class) + 
  geom_point() + 
  geom_abline(aes(intercept = 0, slope = 5), color = "green") + 
  geom_hline(aes(yintercept = 30), color = "blue") + 
  geom_vline(aes(xintercept = 5), color = "red") ## `geom_smooth()` using method = 'loess' and formula 'y ~ x'## `geom_smooth()` using formula 'y ~ x'ggplot(data = mpg) + 
  aes(displ, hwy) + 
  geom_point(aes(colour=class)) + 
  geom_smooth(aes(group=class), method="lm") ## `geom_smooth()` using formula 'y ~ x'ggplot(data = mpg) + 
  aes(displ, hwy, colour = class) + ## global aes will be applied to all higher level aes
  geom_point() + 
  geom_smooth(method="lm") ## `geom_smooth()` using formula 'y ~ x'ggplot(data = mpg) + 
  aes(displ, hwy, colour = class) + ## lower level aes will be applied to all higher level aes
  geom_point() + 
  geom_smooth(method="lm", se = F, size = 2) ## `geom_smooth()` using formula 'y ~ x'mpgSummary <- mpg %>%
  group_by(class) %>%
  summarize(meanDispl = mean(displ), sdDispl = sd(displ)) ## sd is standard deviation, standard error se = sd/sqrt(n)## `summarise()` ungrouping output (override with `.groups` argument)ggplot(data = mpgSummary) + 
  aes(x=class, y=meanDispl, fill=class) + 
  geom_bar(stat="identity",
           colour="black", # Use black outlines,
           size=.3)       # Thinner linesggplot(data = mpgSummary) + 
  aes(x=class, y=meanDispl, fill=class) + 
  geom_bar(stat="identity",
           colour="black", # Use black outlines,
           size=.3) +      # Thinner lines
  geom_errorbar(aes(ymin=meanDispl-sdDispl, ymax=meanDispl+sdDispl),
                size=.3,    # Thinner lines
                width=.2
                )## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.ggplot(data = mpg) + 
  aes(x = hwy) + 
  geom_histogram(aes(fill = class)) + 
  facet_grid(. ~ class) ## or facet_grid(cols = vars(class))## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.ggplot(data = mpg) + 
  aes(x = hwy) + 
  geom_histogram(aes(fill = class)) + 
  facet_grid(class ~ .) ## or facet_grid(rows = vars(class))## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.ggplot(data = mpg) + 
  aes(x = hwy) + 
  geom_histogram(aes(fill = class)) + 
  facet_grid(drv ~ class) ## or facet_grid(rows = vars(drv), cols = vars(class))## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.## Loading required package: Matrix## 
## Attaching package: 'Matrix'## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack##   Reaction Days Subject
## 1 249.5600    0     308
## 2 258.7047    1     308
## 3 250.8006    2     308
## 4 321.4398    3     308
## 5 356.8519    4     308ggplot(data=sleepstudy) + 
  aes(x = Days, y=Reaction, colour = Subject) +
  geom_smooth(method="lm") + 
  facet_wrap(~Subject)## `geom_smooth()` using formula 'y ~ x'sleepSummary <- sleepstudy %>% 
  group_by(Days) %>%
  summarize(Mean = mean(Reaction), SD = sd(Reaction), SE = sd(Reaction)/sqrt(n()))## `summarise()` ungrouping output (override with `.groups` argument)ggplot(data=sleepSummary) + 
  aes(x = Days, y=Mean) +
  geom_path() + 
  geom_errorbar(aes(ymin=Mean-SE, ymax=Mean+SE),
                  size=0.5,    # Thinner lines
                  width=.2) # Subset 10 rows
set.seed(32611)
ss <- sample(1:32, 10)
df <- mtcars[ss, ]
sp <- ggplot(data = df) +
  aes(wt, mpg, label = rownames(df)) +
  geom_point()
# Add texts
sp + geom_text() ## geom_text need the label aessp + geom_text(size=6)sp +  geom_text(hjust=0, vjust=0)sp + geom_text(aes(fontface=2))sp + geom_text(family = "Times New Roman")sp + geom_text(aes(color=factor(cyl)))sp + geom_text(aes(size=wt))sp <- ggplot(data = df) +
  aes(wt, mpg, label = rownames(df)) +
  geom_point()
# Add texts
sp + geom_label()p <- ggplot(mtcars, aes(wt, mpg)) +
  geom_point(color = 'red') 
p + geom_text(aes(label = rownames(mtcars)),
              size = 3.5)## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlapsp <- ggplot(mpg) + 
  geom_point(aes(x = displ, y = hwy, colour=factor(cyl))) + 
  facet_wrap(~class)
p ## default theme_greyplot <- ggplot(mpg, aes(displ, hwy)) + geom_point()
plot + theme(
  panel.background = element_blank(),
  axis.text = element_blank()
)
plot + theme(
  axis.text = element_text(colour = "red", size = rel(1.5))
)
plot + theme(
  axis.line = element_line(arrow = arrow())
)
plot + theme(
  panel.background = element_rect(fill = "white"),
  plot.margin = margin(2, 2, 2, 2, "cm"),
  plot.background = element_rect(
    fill = "grey90",
    colour = "black",
    size = 1
  )
)
## all changes are relative to the default valueline
rect
text
title
aspect.ratio
axis.title
axis.title.x
axis.title.y 
axis.text
axis.text.x
axis.text.y
axis.ticks
axis.ticks.x
axis.ticks.y,
axis.ticks.length
axis.line
axis.line.x
axis.line.y
## for more options, see
?themetheme_grayggplot(mpg) + 
  geom_point(aes(x = displ, y = hwy, colour=factor(cyl))) + 
  theme(legend.position = "none") black.bold.text <- element_text(face = "bold", color = "black", size=20)
ggplot(mpg, aes(displ, hwy, colour=class)) + geom_point() + 
    labs(title="hwy vs displ") + 
    theme_bw() + 
    theme(text = black.bold.text) black.bold.text <- element_text(face = "bold", color = "black", size=20)
ggplot(mpg, aes(displ, hwy, colour=class)) + geom_point() + 
    labs(title="hwy vs displ") + 
    theme_bw() + 
    theme(text = black.bold.text, panel.grid =element_blank()) black.bold.text <- element_text(face = "bold", color = "black", size=20)
red.italic.text <- element_text(face = "italic", color = "red", size=15)
ggplot(mpg, aes(displ, hwy, colour=class)) + geom_point() + 
    labs(title="hwy vs displ") + 
    theme_bw() + 
    theme(axis.text = black.bold.text , axis.title = black.bold.text, 
          legend.title = red.italic.text, 
          legend.text = black.bold.text) cols <- c("8" = "red", "4" = "blue", "6" = "darkgreen", "10" = "orange")
p + scale_colour_manual(values = cols)ggplot(mtcars) +
  aes(mpg, wt, colour = factor(cyl), fill = factor(cyl)) +
   geom_point() + 
  scale_colour_manual(
    values = cols,
    aesthetics = c("colour", "fill")
  )p <- ggplot(mtcars, aes(mpg, wt)) +
  geom_point(aes(colour = factor(cyl))) 
p + scale_x_continuous(breaks = c(15,25),
    labels = c("A", "B"),
    name = "My MPG") n <- 100
df <- data.frame(x = c(rnorm(n, 0, 3), rnorm(n, 0, 10)),
                 g = gl(2, n))
ggplot(df, aes(x, colour = g)) + stat_ecdf()n <- 100
set.seed(32611)
df <- data.frame(
  x = rnorm(n)
)
base <- ggplot(df, aes(x)) + geom_density()
base + stat_function(fun = dnorm, colour = "red") + xlim(c(-3,3))## `geom_smooth()` using method = 'loess' and formula 'y ~ x'## `geom_smooth()` using method = 'loess' and formula 'y ~ x'Setting the limits on a scale converts all values outside the range to NA.
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'## Warning: Removed 136 rows containing non-finite values (stat_smooth).## Warning: Removed 136 rows containing missing values (geom_point).