Programming basics for Biostatistics 6099

R graphics

Zhiguang Huo (Caleb)

Tuesday September 19, 2023

Plotting in R

scatter plot

n <- 80
set.seed(32611)
x <- sort(runif(n, min = 0, max=2*pi))
y <- sin(x) + rnorm(n, mean = 0, sd = 0.2)
plot(x,y)

Type

plot(x,y, type="p") ## default is "p"

plot(x,y, type="l") ## # default xlab and ylab are the variable

plot(x,y, type="b")

plot(x,y, type="o")

plot(x,y, type="n")

Labels

plot(x,y, main="sin function with Gaussian noise", 
     xlab="x axis", ylab="y axis") 

Text and Symbol Size

plot(x,y, main="sin function with Gaussian noise", 
     xlab="x axis", ylab="y axis", 
     cex=2, cex.axis=2, cex.lab=2, cex.main=2, cex.sub=2) 

Text

letters[1:10] ## letters contain a-z
##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j"
plot(x=1:10, y=1:10, type="n", main = "some texts") 
text(x = 1:10, y=1:10, labels = letters[1:10])

Text (2)

letters[1:10] ## letters contain a-z
##  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j"
plot(x=1:10, y=1:10, main = "some texts") 
text(x = 1:10, y=1:10, labels = letters[1:10], pos = NULL)

Math expression

plot(1:10, type="n", xlab="X", ylab="Y")
text(5.5, 9, "expression(y==alpha[1]*x+alpha[2]*x^2)", cex=1.5)
text(5.5, 8, expression(y==alpha[1]*x+alpha[2]*x^2), cex=1.5)
theta = 3
text(5.5, 6, "bquote(hat(theta)==.(theta))", cex=1.5)
text(5.5, 5, bquote(hat(theta)==.(theta)), cex=1.5)

Point type

plot(1:25,1:25,pch=1:25)

plot(x,y, pch=19) 

Multiple plots

n <- 80
set.seed(32611)
x <- sort(runif(n, min = 0, max=2*pi))
y1 <- sin(x) + rnorm(n, mean = 0, sd = 0.2)
y2 <- cos(x) + rnorm(n, mean = 0, sd = 0.2)

par(mfrow=c(1,2))
plot(x,y1, main="sin plot")
plot(x,y2, main="cos plot")

Lines

par(mfrow=c(2,2))
plot(x,y, type="l",lwd=1) 
plot(x,y, type="l",lwd=2) 
plot(x,y, type="l",lwd=3) 
plot(x,y, type="l",lwd=4) 

abline

plot(0:6, 0:6, type = "n")
abline(a = 3, b = -1)

abline

plot(0:6, 0:6, type = "n")
abline(h = 3)
abline(v = 3, col=2)

abline

z <- lm(dist ~ speed, data = cars)
plot(cars)
abline(z) # equivalent to abline(reg = z) or

# abline(coef = coef(z))

Color

plot(1:8,1:8, col=1:8) 

par(mfrow=c(2,2))
plot(x,y, main = "black color") ## default is black
plot(x,y, main = "blue color", col=4) ## basic color option 1:8
plot(x,y, main = "green color", col='green') ## find colors from colors()
plot(x,y, main = "red color", col='#FF0000') ## RGB mode

More on colors

n <- 10
par(mfrow=c(2,3))
plot(1:n,1:n, col=rainbow(n), main = "rainbow") 
plot(1:n,1:n, col=heat.colors(n), main = "heat.colors") 
plot(1:n,1:n, col=terrain.colors(n), main = "terrain.colors") 
plot(1:n,1:n, col=topo.colors(n), main = "topo.colors") 
plot(1:n,1:n, col=cm.colors(n), main = "cm.colors") 

Curve

curve(sin, from = 0, to = 2*pi)

chippy <- function(x) sin(cos(x)*exp(-x/2))
curve(chippy, -8, 7, n = 2001) # n specify number of points used to draw the curve, default n = 101

Legend

curve(sin, 0, 2*pi, col=2)
curve(cos, 0, 2*pi, col=4, add=T) ## add=T to overlay a new curve on top of the original figure
legend("bottomleft", legend = c("sin", "cos"), col = c(2,4),lty = 1)

segments

plot(1:3,1:3,col = "blue")
segments(x0 = 3,y0 = 3,x1 = 1,y1 = 1,lwd=2,col=2)

arrows

plot(1:3,1:3,col = "blue")
arrows(x0 = 3,y0 = 3,x1 = 1,y1 = 1,code=1,angle=30,length=.1,lwd=2)

polygon

plot(1:3,1:3,col = "blue")
polygon(x = c(2,3,3,2),y = c(2,2,3,3),col="pink")

A comprehensive example

set.seed(32611)
curve(dnorm,-4,4, ylim = c(-0.1, 0.5)) 

# shaded area
xvals <- seq(-4,2,length=100) 
dvals <- dnorm(xvals)
polygon(c(xvals,rev(xvals)),c(rep(0,100),rev(dvals)),col="gray")

#label pnorm as area under the curve 
arrows(1,.15,2,.25,code=1,angle=30,length=.1,lwd=2)
text(2,.25,paste('pnorm(2) =',round(pnorm(2),3)),cex=.75,pos=3)

#label dnorm as height
segments(2,0,2,dnorm(2),lwd=2,col=2)
arrows(2,.025,2.5,.1, code=1, angle=30, length=.1, lwd=2, col=2)
text(2.5,.1,paste('dnorm(2) =', round(dnorm(2),3)), cex=.75, pos=3, col=2)

#label qnorm as quantile
points(2,0,col=4,pch=16,cex=1.1)
arrows(2,0,3,.05,code=1,angle=30,length=.1,lwd=2,col=4)
text(3,.05,paste('qnorm(',round(pnorm(2),3),') = 2'), cex=.75,pos=3,col=4)

mtext(side=3,line=.5,'X ~ Normal(0,1)',cex=.9,font=2)
points(rnorm(20),jitter(rep(0,20)),pch=18,cex=.9)
legend(-4,.3,'rnorm(20)',pch=18,cex=.8,bty='n')

margin

par(mar=c(1,2,3,4)) # bottem, left, top, right order, inner margin
plot(x, y, main="Red sin", pch=20, col="red")

par(oma = c(4,3,2,1), mar=c(1,2,3,4)) # bottem, left, top, right order, inner margin and outer margin
plot(x, y, main="Red sin", pch=20, col="red")

mtext

plot(1:10, type="n", xlab="X", ylab="Y")
mtext(text = "mtext", side = 4, line = 1, col="red")

save plot

n <- 80
set.seed(32611)
x <- sort(runif(n, min = 0, max=2*pi))
y <- sin(x) + rnorm(n, mean = 0, sd = 0.2)

pdf("sinFunction.pdf")
plot(x,y)
dev.off()
## quartz_off_screen 
##                 2

IRIS data example

data(iris)
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

boxplot

boxplot(Petal.Length ~ Species, data = iris)

names(iris)
## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species"
par(mfrow = c(2,2))
for(i in 1:4){
  aname <- names(iris)[i]
  boxplot(iris[[i]] ~ iris$Species, xlab="Species", col=i, ylab=aname, main=aname)  
}

histogram

par(mfrow=c(2,2))
hist(iris$Petal.Length) ## default
hist(iris$Petal.Length, nclass=50) ## set number of bins
hist(iris$Petal.Length,  prob=TRUE, col="grey") ## grey color
lines(density(iris$Petal.Length), col="blue") ## and put the density function on top of the histogram

par(mfrow = c(2,2))
xlims <- range(iris$Petal.Length) ## set common x limits
uniqueSpecies <- levels(iris$Species)
for(i in seq_along(uniqueSpecies)){
  aspecies <- uniqueSpecies[i]
  sampleSelection <- iris$Species==aspecies
  adata <- iris$Petal.Length[sampleSelection]
  hist(adata, col=i, xlim=xlims, xlab="petal length",main=aspecies)
}

contour plot

x <- y <- seq(-1, 1, len=25)
z <- outer(x, y, FUN=function(x,y) -x*y*exp(-x^2-y^2))
# Contour plots
contour(x,y,z, main="Contour Plot")

filled.contour(x,y,z, main="Filled Contour Plot")

filled.contour(x,y,z, color.palette = heat.colors)

filled.contour(x,y,z, color.palette = colorRampPalette(c("red", "white", "blue")))

Heatmap.2

library(gplots) ## heatmap.2 is available in gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
?heatmap.2

Application on Iris data

dataMatrix <- as.matrix(iris[,1:4])
heatmap.2(dataMatrix)

dataMatrix <- as.matrix(iris[,1:4])
heatmap.2(dataMatrix, trace = "none")

dataMatrix2 <- as.matrix(iris[1:5,1:4])
heatmap.2(dataMatrix2, trace = "none", 
          cellnote = dataMatrix2, notecol = "black")

dataMatrix <- as.matrix(iris[,1:4])
species <- iris$Species
color0 <- species
levels(color0) <- palette()[1:length(levels(species))]
color <- as.character(color0)

heatmap.2(dataMatrix, trace = "none", RowSideColors = color)
legend("topright", legend=levels(species),fill=levels(color0))

dataMatrix <- as.matrix(iris[,1:4])
species <- iris$Species
color0 <- species
levels(color0) <- palette()[1:length(levels(species))]
color <- as.character(color0)

heatmap.2(dataMatrix, trace = "none", RowSideColors = color,
           Colv=NA, Rowv = NA )
## Warning in heatmap.2(dataMatrix, trace = "none", RowSideColors = color, :
## Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting row dendogram.
## Warning in heatmap.2(dataMatrix, trace = "none", RowSideColors = color, :
## Discrepancy: Colv is FALSE, while dendrogram is `column'. Omitting column
## dendogram.
legend("topright", legend=levels(species),fill=levels(color0))

dataMatrix <- as.matrix(iris[,1:4])
species <- iris$Species
color0 <- species
levels(color0) <- palette()[1:length(levels(species))]
color <- as.character(color0)

heatmap.2(dataMatrix, trace = "none", RowSideColors = color,
           Colv=NA, Rowv = NA , scale = "row")
## Warning in heatmap.2(dataMatrix, trace = "none", RowSideColors = color, :
## Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting row dendogram.
## Warning in heatmap.2(dataMatrix, trace = "none", RowSideColors = color, :
## Discrepancy: Colv is FALSE, while dendrogram is `column'. Omitting column
## dendogram.
legend("topright", legend=levels(species),fill=levels(color0))

dataMatrix <- as.matrix(iris[,1:4])
species <- iris$Species
color0 <- species
levels(color0) <- palette()[1:length(levels(species))]
color <- as.character(color0)

heatmap.2(dataMatrix, trace = "none", 
          RowSideColors = color, col = bluered, 
           Colv=NA, Rowv = NA , scale = "row")
## Warning in heatmap.2(dataMatrix, trace = "none", RowSideColors = color, :
## Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting row dendogram.
## Warning in heatmap.2(dataMatrix, trace = "none", RowSideColors = color, :
## Discrepancy: Colv is FALSE, while dendrogram is `column'. Omitting column
## dendogram.
legend("topright", legend=levels(species),fill=levels(color0))

Adjust legend positions

dataMatrix <- as.matrix(iris[,1:4])
species <- iris$Species
color0 <- species
levels(color0) <- palette()[1:length(levels(species))]
color <- as.character(color0)

heatmap.2(dataMatrix, trace = "none", 
          RowSideColors = color, col = bluered, 
           Colv=NA, Rowv = NA , scale = "row", 
          margins = c(6,6))
## Warning in heatmap.2(dataMatrix, trace = "none", RowSideColors = color, :
## Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting row dendogram.
## Warning in heatmap.2(dataMatrix, trace = "none", RowSideColors = color, :
## Discrepancy: Colv is FALSE, while dendrogram is `column'. Omitting column
## dendogram.
par(xpd=TRUE) ## default is FALSE, allow legend outside the plotting area
legend(x = 0.7, y = 1.1, legend=levels(species),fill=levels(color0))

Advanced heatmap