Zhiguang Huo (Caleb)
Monday September 18, 2017
ggplot2 is based on the grammer of graphics, the idea that you can build every graph from the same few components:
ggplot2 cheetsheet: https://www.rstudio.com/wp-content/uploads/2015/03/ggplot2-cheatsheet.pdf
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2
qplot()
str(mpg)
## Classes 'tbl_df', 'tbl' and 'data.frame': 234 obs. of 11 variables:
## $ manufacturer: chr "audi" "audi" "audi" "audi" ...
## $ model : chr "a4" "a4" "a4" "a4" ...
## $ displ : num 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
## $ year : int 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
## $ cyl : int 4 4 4 4 6 6 6 4 4 4 ...
## $ trans : chr "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
## $ drv : chr "f" "f" "f" "f" ...
## $ cty : int 18 21 20 21 16 18 18 18 16 20 ...
## $ hwy : int 29 29 31 30 26 26 27 26 25 28 ...
## $ fl : chr "p" "p" "p" "p" ...
## $ class : chr "compact" "compact" "compact" "compact" ...
head(mpg)
## # A tibble: 6 × 11
## manufacturer model displ year cyl trans drv cty hwy fl
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr>
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p
## 3 audi a4 2.0 2008 4 manual(m6) f 20 31 p
## 4 audi a4 2.0 2008 4 auto(av) f 21 30 p
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p
## # ... with 1 more variables: class <chr>
qplot(displ, hwy, data = mpg)
## aesthetic: displ, hwy. data: mpg
qplot(displ, hwy, colour = class, data = mpg)
## aesthetic: displ, hwy, class; data: mpg
mpg_sub <- subset(mpg, class!="suv") ## qplot support a maximum of 6 shapes
qplot(displ, hwy, shape = class, data = mpg_sub) ## aesthetic: displ, hwy, class; data: mpg
qplot(displ, hwy, data = mpg, geom = "line")
## aesthetic: displ, hwy
## data: mpg
## Geometries: line
qplot(displ, hwy, data = mpg, geom = "path")
## aesthetic: displ, hwy
## data: mpg
## Geometries: path
qplot(class, displ, data = mpg, geom = "boxplot")
## aesthetic: displ, class
## data: mpg
## Geometries: boxplot
qplot(class, displ, data = mpg, geom = "jitter")
## aesthetic: displ, class
## data: mpg
## Geometries: jitter
qplot(class, displ, data = mpg, geom = c("boxplot", "jitter"))
## aesthetic: displ, class
## data: mpg
## Geometries: jitter
qplot(displ, data = mpg, geom = "histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## aesthetic: displ
## data: mpg
## Geometries: histogram
qplot(displ, data = mpg, geom = "density")
## aesthetic: displ
## data: mpg
## Geometries: density
qplot(displ, data = mpg, geom = "density", facets = ~class)
## aesthetic: displ
## data: mpg
## Geometries: density
## facets: ~class
Compared to qplot(), it’s easier to use multiple dataset in ggplot().
ggplot(mpg) + aes(x=displ, y=hwy, size=cyl) + geom_point()
myggplot <- ggplot(mpg) + aes(x=displ, y=hwy, color=cyl)
myggplot + geom_point()
ggplot(data = mpg, aes(displ, hwy, colour=class)) + geom_point(aes(size=cyl)) + geom_line(aes(group = class))
ls(pattern = '^geom_', env = as.environment('package:ggplot2'))
## [1] "geom_abline" "geom_area" "geom_bar"
## [4] "geom_bin2d" "geom_blank" "geom_boxplot"
## [7] "geom_col" "geom_contour" "geom_count"
## [10] "geom_crossbar" "geom_curve" "geom_density"
## [13] "geom_density_2d" "geom_density2d" "geom_dotplot"
## [16] "geom_errorbar" "geom_errorbarh" "geom_freqpoly"
## [19] "geom_hex" "geom_histogram" "geom_hline"
## [22] "geom_jitter" "geom_label" "geom_line"
## [25] "geom_linerange" "geom_map" "geom_path"
## [28] "geom_point" "geom_pointrange" "geom_polygon"
## [31] "geom_qq" "geom_quantile" "geom_raster"
## [34] "geom_rect" "geom_ribbon" "geom_rug"
## [37] "geom_segment" "geom_smooth" "geom_spoke"
## [40] "geom_step" "geom_text" "geom_tile"
## [43] "geom_violin" "geom_vline"
ggplot(data = mpg, aes(displ, hwy, colour=class)) + geom_point() + geom_smooth(aes(group = class), method="lm", se = F, size = 1)
ggplot(data = mpg, aes(displ, hwy, colour=class)) + geom_smooth(aes(group = class), method="lm", se = T, size = 2)
mpgbox <- ggplot(mpg, aes(class, hwy)) + geom_boxplot(aes(fill=class))
mpgbox
ggplot(mpg, aes(x = hwy)) + geom_histogram(binwidth = 3, fill = "steelblue")
ggplot(mpg, aes(x = hwy)) + geom_histogram(binwidth = 3, aes(fill = class))
ggplot(mpg, aes(x = hwy, fill = class)) + geom_histogram(binwidth = 3) + facet_grid(class ~ .)
mpgSummary <- data.frame(class = with(mpg, tapply(class, class, unique)),
meanDispl = with(mpg, tapply(displ, class, mean)),
sdDispl = with(mpg, tapply(displ, class, sd)))
ggplot(mpgSummary, aes(x=class, y=meanDispl, fill=class)) +
geom_bar(position=position_dodge(), stat="identity",
colour="black", # Use black outlines,
size=.3) + # Thinner lines
geom_errorbar(aes(ymin=meanDispl-sdDispl, ymax=meanDispl+sdDispl),
size=.3, # Thinner lines
width=.2,
position=position_dodge(.9))
ggplot(mpg, aes(x = hwy, fill = class)) + geom_histogram(binwidth = 3) + facet_wrap(~ class)
ggplot(mpg, aes(x = hwy, fill = class)) + geom_histogram(binwidth = 3) + facet_grid(class ~ .) + facet_null()
ggplot(mpg, aes(class, fill=as.factor(cyl))) + geom_bar(position="dodge") #side by side
ggplot(mpg, aes(class, fill=as.factor(cyl))) + geom_bar(position="fill") #fill
ggplot(mpg, aes(class, fill=as.factor(cyl))) + geom_bar(position="stack") #default
df <- data.frame(x = rnorm(1000))
ggplot(df, aes(x)) + stat_ecdf(geom = "step")
n <- 100
df <- data.frame(x = c(rnorm(n, 0, 3), rnorm(n, 0, 10)),
g = gl(2, n))
ggplot(df, aes(x, colour = g)) + stat_ecdf()
n <- 100
set.seed(32611)
df <- data.frame(
x = rnorm(n)
)
x <- df$x
base <- ggplot(df, aes(x)) + geom_density()
base + stat_function(fun = dnorm, colour = "red") + xlim(c(-3,3))
ggplot(mpg, aes(x = displ, y = hwy)) + geom_point() + stat_ellipse()
ggplot(mpg, aes(x = displ, y = hwy, color=displ > 4)) + geom_point() + stat_ellipse()
p <- ggplot(mpg, aes(displ, hwy)) +
geom_point() +
geom_smooth()
p
## `geom_smooth()` using method = 'loess'
p + coord_cartesian(xlim = c(3, 5), expand = FALSE)
## `geom_smooth()` using method = 'loess'
Setting the limits on a scale converts all values outside the range to NA.
p + xlim(3, 5)
## `geom_smooth()` using method = 'loess'
## Warning: Removed 136 rows containing non-finite values (stat_smooth).
## Warning: Removed 136 rows containing missing values (geom_point).
#the same as p + scale_x_continuous(limits = c(325, 500))
p <- ggplot(mpg, aes(displ, hwy)) + geom_point()
p + coord_fixed(ratio = 0.5)
p + coord_fixed(ratio = 0.1)
ggplot(mpg, aes(class, hwy)) +
geom_boxplot() +
coord_flip()
world <- map_data("world")
## Warning: package 'maps' was built under R version 3.3.2
worldmap <- ggplot(world, aes(x=long, y=lat, group=group)) +
geom_path() +
scale_y_continuous(breaks = (-2:2) * 30) +
scale_x_continuous(breaks = (-4:4) * 45)
worldmap
worldmap + coord_map("ortho", orientation = c(41, -74, 0))
pie <- ggplot(mpg, aes(x = factor(1), fill = factor(cyl))) +
geom_bar(width = 1)
pie + coord_polar(theta = "y")
p <- ggplot(mpg) + geom_point(aes(x = displ, y = hwy,
colour=factor(cyl))) + facet_wrap(~class)
p
p + theme_bw()
p <- ggplot(mpg) + geom_point(aes(x = displ, y = hwy,
colour=factor(cyl)))
p + theme(text = element_text(size=20),
axis.text.x = element_text(angle=90, hjust=1))
p <- ggplot(mpg) + geom_point(aes(x = displ, y = hwy,
colour=factor(cyl)))
p +
labs(title = "New plot title", x = "New x label", y = "New y label")
## more please check ?labs
p + labs(title = "New plot title", x = "New x label", y = "New y label")+ theme(axis.text.x = element_text(family = "Arial", colour = "red"))
## more please check ?theme
knitr::purl("Rgraph_ggplot2.rmd", output = "Rgraph_ggplot2.R ", documentation = 2)
##
##
## processing file: Rgraph_ggplot2.rmd
##
|
| | 0%
|
|. | 1%
|
|. | 2%
|
|.. | 3%
|
|... | 4%
|
|... | 5%
|
|.... | 6%
|
|..... | 7%
|
|..... | 8%
|
|...... | 9%
|
|....... | 10%
|
|....... | 11%
|
|........ | 12%
|
|......... | 14%
|
|......... | 15%
|
|.......... | 16%
|
|........... | 17%
|
|............ | 18%
|
|............ | 19%
|
|............. | 20%
|
|.............. | 21%
|
|.............. | 22%
|
|............... | 23%
|
|................ | 24%
|
|................ | 25%
|
|................. | 26%
|
|.................. | 27%
|
|.................. | 28%
|
|................... | 29%
|
|.................... | 30%
|
|.................... | 31%
|
|..................... | 32%
|
|...................... | 33%
|
|...................... | 34%
|
|....................... | 35%
|
|........................ | 36%
|
|........................ | 38%
|
|......................... | 39%
|
|.......................... | 40%
|
|.......................... | 41%
|
|........................... | 42%
|
|............................ | 43%
|
|............................ | 44%
|
|............................. | 45%
|
|.............................. | 46%
|
|.............................. | 47%
|
|............................... | 48%
|
|................................ | 49%
|
|................................ | 50%
|
|................................. | 51%
|
|.................................. | 52%
|
|................................... | 53%
|
|................................... | 54%
|
|.................................... | 55%
|
|..................................... | 56%
|
|..................................... | 57%
|
|...................................... | 58%
|
|....................................... | 59%
|
|....................................... | 60%
|
|........................................ | 61%
|
|......................................... | 62%
|
|......................................... | 64%
|
|.......................................... | 65%
|
|........................................... | 66%
|
|........................................... | 67%
|
|............................................ | 68%
|
|............................................. | 69%
|
|............................................. | 70%
|
|.............................................. | 71%
|
|............................................... | 72%
|
|............................................... | 73%
|
|................................................ | 74%
|
|................................................. | 75%
|
|................................................. | 76%
|
|.................................................. | 77%
|
|................................................... | 78%
|
|................................................... | 79%
|
|.................................................... | 80%
|
|..................................................... | 81%
|
|..................................................... | 82%
|
|...................................................... | 83%
|
|....................................................... | 84%
|
|........................................................ | 85%
|
|........................................................ | 86%
|
|......................................................... | 88%
|
|.......................................................... | 89%
|
|.......................................................... | 90%
|
|........................................................... | 91%
|
|............................................................ | 92%
|
|............................................................ | 93%
|
|............................................................. | 94%
|
|.............................................................. | 95%
|
|.............................................................. | 96%
|
|............................................................... | 97%
|
|................................................................ | 98%
|
|................................................................ | 99%
|
|.................................................................| 100%
## output file: Rgraph_ggplot2.R
## [1] "Rgraph_ggplot2.R "