Zhiguang Huo (Caleb)
Monday September 17, 2018
ggplot2 is based on the grammer of graphics, the idea that you can build every graph from the same few components:
ggplot2 cheetsheet: https://www.rstudio.com/wp-content/uploads/2015/03/ggplot2-cheatsheet.pdf
Compared to qplot(), it’s easier to use multiple dataset in ggplot().
mpg_sub <- subset(mpg, class!="suv") ## qplot support a maximum of 6 shapes
ggplot(data = mpg_sub) +
aes(x=displ, y=hwy, shape = class) +
geom_point()
## [1] "geom_abline" "geom_area" "geom_bar"
## [4] "geom_bin2d" "geom_blank" "geom_boxplot"
## [7] "geom_col" "geom_contour" "geom_count"
## [10] "geom_crossbar" "geom_curve" "geom_density"
## [13] "geom_density_2d" "geom_density2d" "geom_dotplot"
## [16] "geom_errorbar" "geom_errorbarh" "geom_freqpoly"
## [19] "geom_hex" "geom_histogram" "geom_hline"
## [22] "geom_jitter" "geom_label" "geom_line"
## [25] "geom_linerange" "geom_map" "geom_path"
## [28] "geom_point" "geom_pointrange" "geom_polygon"
## [31] "geom_qq" "geom_qq_line" "geom_quantile"
## [34] "geom_raster" "geom_rect" "geom_ribbon"
## [37] "geom_rug" "geom_segment" "geom_sf"
## [40] "geom_smooth" "geom_spoke" "geom_step"
## [43] "geom_text" "geom_tile" "geom_violin"
## [46] "geom_vline"
ggplot(data = mpg) +
aes(displ, hwy, colour=class) +
geom_point(aes(size=cyl)) +
geom_line(aes(group = class))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg) +
aes(displ, hwy) +
geom_point(aes(colour=class)) +
geom_smooth(aes(group=class), method="lm")
ggplot(data = mpg) +
aes(displ, hwy, colour = class) + ## lower level aes will be applied to all higher level aes
geom_point() +
geom_smooth(method="lm")
ggplot(data = mpg) +
aes(displ, hwy, colour = class) + ## lower level aes will be applied to all higher level aes
geom_point() +
geom_smooth(method="lm", se = F, size = 2)
mpgSummary <- data.frame(class = with(mpg, tapply(class, class, unique)),
meanDispl = with(mpg, tapply(displ, class, mean)),
sdDispl = with(mpg, tapply(displ, class, sd)))
ggplot(data = mpgSummary) +
aes(x=class, y=meanDispl, fill=class) +
geom_bar(position=position_dodge(), stat="identity",
colour="black", # Use black outlines,
size=.3) + # Thinner lines
geom_errorbar(aes(ymin=meanDispl-sdDispl, ymax=meanDispl+sdDispl),
size=.3, # Thinner lines
width=.2,
position=position_dodge(.9))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Loading required package: Matrix
## Reaction Days Subject
## 1 249.5600 0 308
## 2 258.7047 1 308
## 3 250.8006 2 308
## 4 321.4398 3 308
## 5 356.8519 4 308
ggplot(data=sleepstudy) +
aes(x = Days, y=Reaction, colour = Subject) +
geom_smooth(method="lm") +
facet_wrap(~Subject)
sleepSummary <- with(sleepstudy,
data.frame(Days = tapply(Days,Days,unique),
Mean = tapply(Reaction,Days,mean),
SD = tapply(Reaction,Days,sd))
)
ggplot(data=sleepSummary) +
aes(x = Days, y=Mean) +
geom_path() +
geom_errorbar(aes(ymin=Mean-SD, ymax=Mean+SD),
size=0.5, # Thinner lines
width=.2)
sp <- ggplot(data = df) +
aes(wt, mpg, label = rownames(df)) +
geom_point()
# Add texts
sp + geom_text()
sp + geom_text(size=6)
sp + geom_text(hjust=0, vjust=0)
sp + geom_text(aes(fontface=2))
sp + geom_text(family = "Times New Roman")
sp + geom_text(aes(color=factor(cyl)))
sp + geom_text(aes(size=wt))
sp <- ggplot(data = df) +
aes(wt, mpg, label = rownames(df)) +
geom_point()
# Add texts
sp + geom_label()
p <- ggplot(mtcars, aes(wt, mpg)) +
geom_point(color = 'red')
p + geom_text(aes(label = rownames(mtcars)),
size = 3.5)
p <- ggplot(mpg) +
geom_point(aes(x = displ, y = hwy, colour=factor(cyl))) +
facet_wrap(~class)
p ## default theme_grey
plot <- ggplot(mpg, aes(displ, hwy)) + geom_point()
plot + theme(
panel.background = element_blank(),
axis.text = element_blank()
)
plot + theme(
axis.text = element_text(colour = "red", size = rel(1.5))
)
plot + theme(
axis.line = element_line(arrow = arrow())
)
plot + theme(
panel.background = element_rect(fill = "white"),
plot.margin = margin(2, 2, 2, 2, "cm"),
plot.background = element_rect(
fill = "grey90",
colour = "black",
size = 1
)
)
## all changes are relative to the default value
line
rect
text
title
aspect.ratio
axis.title
axis.title.x
axis.title.y
axis.text
axis.text.x
axis.text.y
axis.ticks
axis.ticks.x
axis.ticks.y,
axis.ticks.length
axis.line
axis.line.x
axis.line.y
## for more options, see
?theme
theme_gray
ggplot(mpg) +
geom_point(aes(x = displ, y = hwy, colour=factor(cyl))) +
theme(legend.position = "none")
black.bold.text <- element_text(face = "bold", color = "black", size=20)
ggplot(mpg, aes(displ, hwy, colour=class)) + geom_point() +
labs(title="hwy vs displ") +
theme_bw() +
theme(text = black.bold.text)
black.bold.text <- element_text(face = "bold", color = "black", size=20)
ggplot(mpg, aes(displ, hwy, colour=class)) + geom_point() +
labs(title="hwy vs displ") +
theme_bw() +
theme(text = black.bold.text, panel.grid =element_blank())
black.bold.text <- element_text(face = "bold", color = "black", size=20)
red.italic.text <- element_text(face = "italic", color = "red", size=15)
ggplot(mpg, aes(displ, hwy, colour=class)) + geom_point() +
labs(title="hwy vs displ") +
theme_bw() +
theme(axis.text = black.bold.text , axis.title = black.bold.text,
legend.title = red.italic.text,
legend.text = black.bold.text)
n <- 100
df <- data.frame(x = c(rnorm(n, 0, 3), rnorm(n, 0, 10)),
g = gl(2, n))
ggplot(df, aes(x, colour = g)) + stat_ecdf()
n <- 100
set.seed(32611)
df <- data.frame(
x = rnorm(n)
)
x <- df$x
base <- ggplot(df, aes(x)) + geom_density()
base + stat_function(fun = dnorm, colour = "red") + xlim(c(-3,3))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Setting the limits on a scale converts all values outside the range to NA.
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 136 rows containing non-finite values (stat_smooth).
## Warning: Removed 136 rows containing missing values (geom_point).