Zhiguang Huo (Caleb)
Monday September 13, 2021
ggplot2 is based on the grammer of graphics, the idea that you can build every graph from the same few components:
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble 3.0.4 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## tibble [234 × 11] (S3: tbl_df/tbl/data.frame)
## $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
## $ model : chr [1:234] "a4" "a4" "a4" "a4" ...
## $ displ : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
## $ year : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
## $ cyl : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
## $ trans : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
## $ drv : chr [1:234] "f" "f" "f" "f" ...
## $ cty : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
## $ hwy : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
## $ fl : chr [1:234] "p" "p" "p" "p" ...
## $ class : chr [1:234] "compact" "compact" "compact" "compact" ...
## # A tibble: 6 x 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compa…
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compa…
## 3 audi a4 2 2008 4 manual(m6) f 20 31 p compa…
## 4 audi a4 2 2008 4 auto(av) f 21 30 p compa…
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compa…
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compa…
ggplot(data = mpg) +
aes(x=displ, y=hwy, color = "blue") +
geom_point() ## Doesn't work, aes only maps a variable (in the data) to a color.
ggplot(data = mpg) +
aes(x=displ, y=hwy, color = I("blue")) +
geom_point() ## use I to indicate absolute color
ggplot(data = mpg) +
aes(x=displ, y=hwy ) +
geom_point(color = "blue") ## or use the color here (outside aes)
ggplot(data = mpg) +
aes(x=displ, y=hwy, size = "3") +
geom_point() ## Doesn't work, aes only maps a variable (in the data) to a size
ggplot(data = mpg) +
aes(x=displ, y=hwy, size = I(3)) +
geom_point() ## use I to indicate absolute size
ggplot(data = mpg) +
aes(x=displ, y=hwy ) +
geom_point(size = 3) ## or use the size here
ggplot(data = mpg) +
aes(x=displ, y=hwy ) +
geom_point(size = rel(3)) ## relative size
ggplot(data = mpg) +
aes(x=displ, y=hwy, alpha = 1) +
geom_point() ## Doesn't work, aes only maps a variable (in the data) to a alpha
ggplot(data = mpg) +
aes(x=displ, y=hwy, alpha = I(0.5)) +
geom_point() ## use I to indicate absolute alpha
ggplot(data = mpg) +
aes(x=displ, y=hwy ) +
geom_point(alpha = 0.5) ## or use the alpha here
## [1] "geom_abline" "geom_area" "geom_bar"
## [4] "geom_bin2d" "geom_blank" "geom_boxplot"
## [7] "geom_col" "geom_contour" "geom_contour_filled"
## [10] "geom_count" "geom_crossbar" "geom_curve"
## [13] "geom_density" "geom_density_2d" "geom_density_2d_filled"
## [16] "geom_density2d" "geom_density2d_filled" "geom_dotplot"
## [19] "geom_errorbar" "geom_errorbarh" "geom_freqpoly"
## [22] "geom_function" "geom_hex" "geom_histogram"
## [25] "geom_hline" "geom_jitter" "geom_label"
## [28] "geom_line" "geom_linerange" "geom_map"
## [31] "geom_path" "geom_point" "geom_pointrange"
## [34] "geom_polygon" "geom_qq" "geom_qq_line"
## [37] "geom_quantile" "geom_raster" "geom_rect"
## [40] "geom_ribbon" "geom_rug" "geom_segment"
## [43] "geom_sf" "geom_sf_label" "geom_sf_text"
## [46] "geom_smooth" "geom_spoke" "geom_step"
## [49] "geom_text" "geom_tile" "geom_violin"
## [52] "geom_vline"
ggplot(data = mpg) +
aes(displ, hwy, colour=class) + ## this is global color
geom_point() +
geom_line()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
ggplot(data = mpg) +
aes(displ, hwy) +
geom_point(aes(colour=class)) +
geom_smooth(aes(group=class), method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(data = mpg) +
aes(displ, hwy, colour = class) + ## global aes will be applied to all higher level aes
geom_point() +
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(data = mpg) +
aes(displ, hwy, colour = class) + ## lower level aes will be applied to all higher level aes
geom_point() +
geom_smooth(method="lm", se = F, size = 2)
## `geom_smooth()` using formula 'y ~ x'
mpgSummary <- mpg %>%
group_by(class) %>%
summarize(meanDispl = mean(displ), sdDispl = sd(displ)) ## sd is standard deviation, standard error se = sd/sqrt(n)
## `summarise()` ungrouping output (override with `.groups` argument)
ggplot(data = mpgSummary) +
aes(x=class, y=meanDispl, fill=class) +
geom_bar(position=position_dodge(), stat="identity",
colour="black", # Use black outlines,
size=.3) + # Thinner lines
geom_errorbar(aes(ymin=meanDispl-sdDispl, ymax=meanDispl+sdDispl),
size=.3, # Thinner lines
width=.2,
position=position_dodge(.9))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = mpg) +
aes(x = hwy) +
geom_histogram(aes(fill = class)) +
facet_grid(. ~ class) ## or facet_grid(cols = vars(class))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = mpg) +
aes(x = hwy) +
geom_histogram(aes(fill = class)) +
facet_grid(class ~ .) ## or facet_grid(rows = vars(class))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = mpg) +
aes(x = hwy) +
geom_histogram(aes(fill = class)) +
facet_grid(drv ~ class) ## or facet_grid(rows = vars(drv), cols = vars(class))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Reaction Days Subject
## 1 249.5600 0 308
## 2 258.7047 1 308
## 3 250.8006 2 308
## 4 321.4398 3 308
## 5 356.8519 4 308
ggplot(data=sleepstudy) +
aes(x = Days, y=Reaction, colour = Subject) +
geom_smooth(method="lm") +
facet_wrap(~Subject)
## `geom_smooth()` using formula 'y ~ x'
sleepSummary <- sleepstudy %>%
group_by(Days) %>%
summarize(Mean = mean(Reaction), SD = sd(Reaction), SE = sd(Reaction)/sqrt(n()))
## `summarise()` ungrouping output (override with `.groups` argument)
sp + geom_text(size=6)
sp + geom_text(hjust=0, vjust=0)
sp + geom_text(aes(fontface=2))
sp + geom_text(family = "Times New Roman")
sp + geom_text(aes(color=factor(cyl)))
sp + geom_text(aes(size=wt))
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
plot <- ggplot(mpg, aes(displ, hwy)) + geom_point()
plot + theme(
panel.background = element_blank(),
axis.text = element_blank()
)
plot + theme(
axis.text = element_text(colour = "red", size = rel(1.5))
)
plot + theme(
axis.line = element_line(arrow = arrow())
)
plot + theme(
panel.background = element_rect(fill = "white"),
plot.margin = margin(2, 2, 2, 2, "cm"),
plot.background = element_rect(
fill = "grey90",
colour = "black",
size = 1
)
)
## all changes are relative to the default value
line
rect
text
title
aspect.ratio
axis.title
axis.title.x
axis.title.y
axis.text
axis.text.x
axis.text.y
axis.ticks
axis.ticks.x
axis.ticks.y,
axis.ticks.length
axis.line
axis.line.x
axis.line.y
## for more options, see
?theme
theme_gray
black.bold.text <- element_text(face = "bold", color = "black", size=20)
ggplot(mpg, aes(displ, hwy, colour=class)) + geom_point() +
labs(title="hwy vs displ") +
theme_bw() +
theme(text = black.bold.text)
black.bold.text <- element_text(face = "bold", color = "black", size=20)
red.italic.text <- element_text(face = "italic", color = "red", size=15)
ggplot(mpg, aes(displ, hwy, colour=class)) + geom_point() +
labs(title="hwy vs displ") +
theme_bw() +
theme(axis.text = black.bold.text , axis.title = black.bold.text,
legend.title = red.italic.text,
legend.text = black.bold.text)
p <- ggplot(mtcars, aes(mpg, wt)) +
geom_point(aes(colour = factor(cyl)))
p + scale_x_continuous(breaks = c(15,25),
labels = c("A", "B"),
name = "My MPG")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Setting the limits on a scale converts all values outside the range to NA.
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 136 rows containing non-finite values (stat_smooth).
## Warning: Removed 136 rows containing missing values (geom_point).