Zhiguang Huo (Caleb)
Thursday September 14, 2023
ggplot2 is based on the grammer of graphics, the idea that you can build every graph from the same few components:
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ dplyr 1.1.0
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.4 ✔ forcats 1.0.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## tibble [234 × 11] (S3: tbl_df/tbl/data.frame)
## $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
## $ model : chr [1:234] "a4" "a4" "a4" "a4" ...
## $ displ : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
## $ year : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
## $ cyl : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
## $ trans : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
## $ drv : chr [1:234] "f" "f" "f" "f" ...
## $ cty : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
## $ hwy : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
## $ fl : chr [1:234] "p" "p" "p" "p" ...
## $ class : chr [1:234] "compact" "compact" "compact" "compact" ...
## # A tibble: 6 × 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compa…
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compa…
## 3 audi a4 2 2008 4 manual(m6) f 20 31 p compa…
## 4 audi a4 2 2008 4 auto(av) f 21 30 p compa…
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compa…
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compa…
ggplot(data = mpg) +
aes(x=displ, y=hwy, color = "blue") +
geom_point() ## Doesn't work, aes only maps a variable (in the data) to a color.
ggplot(data = mpg) +
aes(x=displ, y=hwy, color = I("blue")) +
geom_point() ## use I to indicate absolute color
ggplot(data = mpg) +
aes(x=displ, y=hwy ) +
geom_point(color = "blue") ## or use the color here (outside aes)
ggplot(data = mpg) +
aes(x=displ, y=hwy, size = "3") +
geom_point() ## Doesn't work, aes only maps a variable (in the data) to a size
ggplot(data = mpg) +
aes(x=displ, y=hwy, size = I(3)) +
geom_point() ## use I to indicate absolute size
ggplot(data = mpg) +
aes(x=displ, y=hwy ) +
geom_point(size = 3) ## or use the size here
ggplot(data = mpg) +
aes(x=displ, y=hwy, alpha = 1) +
geom_point() ## Doesn't work, aes only maps a variable (in the data) to a alpha
ggplot(data = mpg) +
aes(x=displ, y=hwy, alpha = I(0.5)) +
geom_point() ## use I to indicate absolute alpha
ggplot(data = mpg) +
aes(x=displ, y=hwy ) +
geom_point(alpha = 0.5) ## or use the alpha here
xvariable = "displ"
yvariable = "hwy"
ggplot(data = mpg) +
aes_string(x=xvariable, y=yvariable, color = "class") +
geom_point()
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## [1] "geom_abline" "geom_area" "geom_bar"
## [4] "geom_bin_2d" "geom_bin2d" "geom_blank"
## [7] "geom_boxplot" "geom_col" "geom_contour"
## [10] "geom_contour_filled" "geom_count" "geom_crossbar"
## [13] "geom_curve" "geom_density" "geom_density_2d"
## [16] "geom_density_2d_filled" "geom_density2d" "geom_density2d_filled"
## [19] "geom_dotplot" "geom_errorbar" "geom_errorbarh"
## [22] "geom_freqpoly" "geom_function" "geom_hex"
## [25] "geom_histogram" "geom_hline" "geom_jitter"
## [28] "geom_label" "geom_line" "geom_linerange"
## [31] "geom_map" "geom_path" "geom_point"
## [34] "geom_pointrange" "geom_polygon" "geom_qq"
## [37] "geom_qq_line" "geom_quantile" "geom_raster"
## [40] "geom_rect" "geom_ribbon" "geom_rug"
## [43] "geom_segment" "geom_sf" "geom_sf_label"
## [46] "geom_sf_text" "geom_smooth" "geom_spoke"
## [49] "geom_step" "geom_text" "geom_tile"
## [52] "geom_violin" "geom_vline"
ggplot(data = mpg) +
aes(displ, hwy, colour=class) + ## this is global color
geom_point() +
geom_line()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data = mpg) +
aes(displ, hwy) +
geom_point(aes(colour=class)) +
geom_smooth(aes(group=class), method="lm")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data = mpg) +
aes(displ, hwy, colour = class) + ## global aes will be applied to all higher level aes
geom_point() +
geom_smooth(method="lm")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data = mpg) +
aes(displ, hwy, colour = class) + ## lower level aes will be applied to all higher level aes
geom_point() +
geom_smooth(method="lm", se = F, size = 2)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
mpgSummary <- mpg %>%
group_by(class) %>%
summarize(meanDispl = mean(displ), sdDispl = sd(displ)) ## sd is standard deviation, standard error se = sd/sqrt(n)
ggplot(data = mpgSummary) +
aes(x=class, y=meanDispl, fill=class) +
geom_bar(stat="identity",
colour="black", # Use black outlines,
size=.3) # Thinner lines
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = mpg) +
aes(x = hwy) +
geom_histogram(aes(fill = class)) +
facet_grid(. ~ class) ## or facet_grid(cols = vars(class))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = mpg) +
aes(x = hwy) +
geom_histogram(aes(fill = class)) +
facet_grid(class ~ .) ## or facet_grid(rows = vars(class))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = mpg) +
aes(x = hwy) +
geom_histogram(aes(fill = class)) +
facet_grid(drv ~ class) ## or facet_grid(rows = vars(drv), cols = vars(class))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Reaction Days Subject
## 1 249.5600 0 308
## 2 258.7047 1 308
## 3 250.8006 2 308
## 4 321.4398 3 308
## 5 356.8519 4 308
ggplot(data=sleepstudy) +
aes(x = Days, y=Reaction, colour = Subject) +
geom_smooth(method="lm") +
facet_wrap(~Subject)
## `geom_smooth()` using formula = 'y ~ x'
sp + geom_text(size=6)
sp + geom_text(hjust=0, vjust=0)
sp + geom_text(aes(fontface=2))
sp + geom_text(family = "Times New Roman")
sp + geom_text(aes(color=factor(cyl)))
sp + geom_text(aes(size=wt))
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
plot <- ggplot(mpg, aes(displ, hwy)) + geom_point()
plot + theme(
panel.background = element_blank(),
axis.text = element_blank()
)
plot + theme(
axis.text = element_text(colour = "red", size = rel(1.5))
)
plot + theme(
axis.line = element_line(arrow = arrow())
)
plot + theme(
panel.background = element_rect(fill = "white"),
plot.margin = margin(2, 2, 2, 2, "cm"),
plot.background = element_rect(
fill = "grey90",
colour = "black",
size = 1
)
)
## all changes are relative to the default value
line
rect
text
title
aspect.ratio
axis.title
axis.title.x
axis.title.y
axis.text
axis.text.x
axis.text.y
axis.ticks
axis.ticks.x
axis.ticks.y,
axis.ticks.length
axis.line
axis.line.x
axis.line.y
## for more options, see
?theme
theme_gray
black.bold.text <- element_text(face = "bold", color = "black", size=20)
ggplot(mpg, aes(displ, hwy, colour=class)) + geom_point() +
labs(title="hwy vs displ") +
theme_bw() +
theme(text = black.bold.text)
black.bold.text <- element_text(face = "bold", color = "black", size=20)
red.italic.text <- element_text(face = "italic", color = "red", size=15)
ggplot(mpg, aes(displ, hwy, colour=class)) + geom_point() +
labs(title="hwy vs displ") +
theme_bw() +
theme(axis.text = black.bold.text , axis.title = black.bold.text,
legend.title = red.italic.text,
legend.text = black.bold.text)
p <- ggplot(mtcars, aes(mpg, wt)) +
geom_point(aes(colour = factor(cyl)))
p + scale_x_continuous(breaks = c(15,25),
labels = c("A", "B"),
name = "My MPG")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Setting the limits on a scale converts all values outside the range to NA.
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 136 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 136 rows containing missing values (`geom_point()`).