Zhiguang Huo (Caleb)
Monday September 18, 2017
ggplot2 is based on the grammer of graphics, the idea that you can build every graph from the same few components:
ggplot2 cheetsheet: https://www.rstudio.com/wp-content/uploads/2015/03/ggplot2-cheatsheet.pdf
## Warning: package 'ggplot2' was built under R version 3.3.2
## Classes 'tbl_df', 'tbl' and 'data.frame': 234 obs. of 11 variables:
## $ manufacturer: chr "audi" "audi" "audi" "audi" ...
## $ model : chr "a4" "a4" "a4" "a4" ...
## $ displ : num 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
## $ year : int 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
## $ cyl : int 4 4 4 4 6 6 6 4 4 4 ...
## $ trans : chr "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
## $ drv : chr "f" "f" "f" "f" ...
## $ cty : int 18 21 20 21 16 18 18 18 16 20 ...
## $ hwy : int 29 29 31 30 26 26 27 26 25 28 ...
## $ fl : chr "p" "p" "p" "p" ...
## $ class : chr "compact" "compact" "compact" "compact" ...
## # A tibble: 6 × 11
## manufacturer model displ year cyl trans drv cty hwy fl
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr>
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p
## 3 audi a4 2.0 2008 4 manual(m6) f 20 31 p
## 4 audi a4 2.0 2008 4 auto(av) f 21 30 p
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p
## # ... with 1 more variables: class <chr>
qplot(displ, hwy, data = mpg)
## aesthetic: displ, hwy. data: mpg
qplot(displ, hwy, colour = class, data = mpg)
## aesthetic: displ, hwy, class; data: mpg
mpg_sub <- subset(mpg, class!="suv") ## qplot support a maximum of 6 shapes
qplot(displ, hwy, shape = class, data = mpg_sub) ## aesthetic: displ, hwy, class; data: mpg
qplot(displ, hwy, data = mpg, geom = "line")
## aesthetic: displ, hwy
## data: mpg
## Geometries: line
qplot(displ, hwy, data = mpg, geom = "path")
## aesthetic: displ, hwy
## data: mpg
## Geometries: path
qplot(class, displ, data = mpg, geom = "boxplot")
## aesthetic: displ, class
## data: mpg
## Geometries: boxplot
qplot(class, displ, data = mpg, geom = "jitter")
## aesthetic: displ, class
## data: mpg
## Geometries: jitter
qplot(class, displ, data = mpg, geom = c("boxplot", "jitter"))
## aesthetic: displ, class
## data: mpg
## Geometries: jitter
qplot(displ, data = mpg, geom = "histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## aesthetic: displ
## data: mpg
## Geometries: histogram
qplot(displ, data = mpg, geom = "density")
## aesthetic: displ
## data: mpg
## Geometries: density
qplot(displ, data = mpg, geom = "density", facets = ~class)
## aesthetic: displ
## data: mpg
## Geometries: density
## facets: ~class
Compared to qplot(), it’s easier to use multiple dataset in ggplot().
ggplot(mpg) + aes(x=displ, y=hwy, size=cyl) + geom_point()
myggplot <- ggplot(mpg) + aes(x=displ, y=hwy, color=cyl)
myggplot + geom_point()
ggplot(data = mpg, aes(displ, hwy, colour=class)) + geom_point(aes(size=cyl)) + geom_line(aes(group = class))
ls(pattern = '^geom_', env = as.environment('package:ggplot2'))
## [1] "geom_abline" "geom_area" "geom_bar"
## [4] "geom_bin2d" "geom_blank" "geom_boxplot"
## [7] "geom_col" "geom_contour" "geom_count"
## [10] "geom_crossbar" "geom_curve" "geom_density"
## [13] "geom_density_2d" "geom_density2d" "geom_dotplot"
## [16] "geom_errorbar" "geom_errorbarh" "geom_freqpoly"
## [19] "geom_hex" "geom_histogram" "geom_hline"
## [22] "geom_jitter" "geom_label" "geom_line"
## [25] "geom_linerange" "geom_map" "geom_path"
## [28] "geom_point" "geom_pointrange" "geom_polygon"
## [31] "geom_qq" "geom_quantile" "geom_raster"
## [34] "geom_rect" "geom_ribbon" "geom_rug"
## [37] "geom_segment" "geom_smooth" "geom_spoke"
## [40] "geom_step" "geom_text" "geom_tile"
## [43] "geom_violin" "geom_vline"
ggplot(data = mpg, aes(displ, hwy, colour=class)) + geom_point() + geom_smooth(aes(group = class), method="lm", se = F, size = 1)
ggplot(data = mpg, aes(displ, hwy, colour=class)) + geom_smooth(aes(group = class), method="lm", se = T, size = 2)
mpgbox <- ggplot(mpg, aes(class, hwy)) + geom_boxplot(aes(fill=class))
ggplot(mpg, aes(x = hwy)) + geom_histogram(binwidth = 3, fill = "steelblue")
ggplot(mpg, aes(x = hwy)) + geom_histogram(binwidth = 3, aes(fill = class))
ggplot(mpg, aes(x = hwy, fill = class)) + geom_histogram(binwidth = 3) + facet_grid(class ~ .)
mpgSummary <- data.frame(class = with(mpg, tapply(class, class, unique)),
meanDispl = with(mpg, tapply(displ, class, mean)),
sdDispl = with(mpg, tapply(displ, class, sd)))
ggplot(mpgSummary, aes(x=class, y=meanDispl, fill=class)) +
geom_bar(position=position_dodge(), stat="identity",
colour="black", # Use black outlines,
size=.3) + # Thinner lines
geom_errorbar(aes(ymin=meanDispl-sdDispl, ymax=meanDispl+sdDispl),
size=.3, # Thinner lines
ggplot(mpg, aes(x = hwy, fill = class)) + geom_histogram(binwidth = 3) + facet_wrap(~ class)
ggplot(mpg, aes(x = hwy, fill = class)) + geom_histogram(binwidth = 3) + facet_grid(class ~ .) + facet_null()
ggplot(mpg, aes(class, fill=as.factor(cyl))) + geom_bar(position="dodge") #side by side
ggplot(mpg, aes(class, fill=as.factor(cyl))) + geom_bar(position="fill") #fill
ggplot(mpg, aes(class, fill=as.factor(cyl))) + geom_bar(position="stack") #default
df <- data.frame(x = rnorm(1000))
ggplot(df, aes(x)) + stat_ecdf(geom = "step")
n <- 100
df <- data.frame(x = c(rnorm(n, 0, 3), rnorm(n, 0, 10)),
g = gl(2, n))
ggplot(df, aes(x, colour = g)) + stat_ecdf()
n <- 100
df <- data.frame(
x = rnorm(n)
x <- df$x
base <- ggplot(df, aes(x)) + geom_density()
base + stat_function(fun = dnorm, colour = "red") + xlim(c(-3,3))
ggplot(mpg, aes(x = displ, y = hwy)) + geom_point() + stat_ellipse()
ggplot(mpg, aes(x = displ, y = hwy, color=displ > 4)) + geom_point() + stat_ellipse()
p <- ggplot(mpg, aes(displ, hwy)) +
geom_point() +
## `geom_smooth()` using method = 'loess'
p + coord_cartesian(xlim = c(3, 5), expand = FALSE)
## `geom_smooth()` using method = 'loess'
Setting the limits on a scale converts all values outside the range to NA.
p + xlim(3, 5)
## `geom_smooth()` using method = 'loess'
## Warning: Removed 136 rows containing non-finite values (stat_smooth).
## Warning: Removed 136 rows containing missing values (geom_point).
#the same as p + scale_x_continuous(limits = c(325, 500))
p <- ggplot(mpg, aes(displ, hwy)) + geom_point()
p + coord_fixed(ratio = 0.5)
p + coord_fixed(ratio = 0.1)
ggplot(mpg, aes(class, hwy)) +
geom_boxplot() +
world <- map_data("world")
## Warning: package 'maps' was built under R version 3.3.2
worldmap <- ggplot(world, aes(x=long, y=lat, group=group)) +
geom_path() +
scale_y_continuous(breaks = (-2:2) * 30) +
scale_x_continuous(breaks = (-4:4) * 45)
worldmap + coord_map("ortho", orientation = c(41, -74, 0))
pie <- ggplot(mpg, aes(x = factor(1), fill = factor(cyl))) +
geom_bar(width = 1)
pie + coord_polar(theta = "y")
p <- ggplot(mpg) + geom_point(aes(x = displ, y = hwy,
colour=factor(cyl))) + facet_wrap(~class)
p + theme_bw()
p <- ggplot(mpg) + geom_point(aes(x = displ, y = hwy,
p + theme(text = element_text(size=20),
axis.text.x = element_text(angle=90, hjust=1))
p <- ggplot(mpg) + geom_point(aes(x = displ, y = hwy,
p +
labs(title = "New plot title", x = "New x label", y = "New y label")
## more please check ?labs
p + labs(title = "New plot title", x = "New x label", y = "New y label")+ theme(axis.text.x = element_text(family = "Arial", colour = "red"))
## more please check ?theme
knitr::purl("Rgraph_ggplot2.rmd", output = "Rgraph_ggplot2.R ", documentation = 2)
## processing file: Rgraph_ggplot2.rmd
## output file: Rgraph_ggplot2.R
## [1] "Rgraph_ggplot2.R "