Introduction to Biostatistical Computing PHC 6937

Matplotlib

Zhiguang Huo (Caleb)

Monday Nov 28th, 2022

Outlines

Get started

import matplotlib.pyplot as plt

time series plot

A toy example

rng = np.random.default_rng(32611)
data = rng.standard_normal(30)
data_cumsum = data.cumsum()
plt.plot(data_cumsum)

Figure and axes

use fig.add_subplot() to create Axes

Single figure

fig = plt.figure()  # an empty figure with no Axes
ax = fig.add_subplot(1,1,1) # add one Axes
ax.plot(data_cumsum)

Multiple subfigures

fig = plt.figure(figsize=(6, 3))
ax1 = fig.add_subplot(1,2,1)
ax2 = fig.add_subplot(1,2,2)
plt.show()

use plt.subplots() to create Axes

Single figure

fig, ax = plt.subplots(figsize=(3, 3))  # a figure with one Axes
ax
ax.plot(data_cumsum)

Multiple subfigures

fig, axs = plt.subplots(2, 2)  # a figure with a 2x2 grid of Axes
axs
## array([[<AxesSubplot: >, <AxesSubplot: >],
##        [<AxesSubplot: >, <AxesSubplot: >]], dtype=object)
axs[0,0].plot(data_cumsum)

use plt.subplot() to create Axes

plt.figure(figsize=(6, 3))
plt.subplot(1,2,1)
plt.plot(data_cumsum)
plt.subplot(122)
plt.show()

colors

plt.plot(data_cumsum, color = "g")

colors

fig = plt.figure(figsize=(8, 2))
ax1 = fig.add_subplot(1,4,1)
ax2 = fig.add_subplot(1,4,2)
ax3 = fig.add_subplot(1,4,3)
ax4 = fig.add_subplot(1,4,4)

ax1.plot(data_cumsum, color = "b")
ax2.plot(data_cumsum, color = "g")
ax3.plot(data_cumsum, color = "r")
ax4.plot(data_cumsum, color = "k")

linestype

plt.figure(figsize=(6, 6))
plt.subplot(221)
plt.plot(data_cumsum, linestyle = "-")
plt.subplot(222)
plt.plot(data_cumsum, linestyle = "--")
plt.subplot(223)
plt.plot(data_cumsum, linestyle = "-.")
plt.subplot(224)
plt.plot(data_cumsum, linestyle = ":")
plt.show()

markers

markers

fig, axes = plt.subplots(2,3,sharex=True, sharey=True)
axes[0,0].plot(data_cumsum, marker = "o")
axes[0,1].plot(data_cumsum, marker = "v")
axes[0,2].plot(data_cumsum, marker = "^")
axes[1,0].plot(data_cumsum, marker = "D")
axes[1,1].plot(data_cumsum, marker = "X")
axes[1,2].plot(data_cumsum, marker = "s")
plt.show()

combinations of color, markers, and linestyle

plt.figure(figsize=(6, 3))
plt.subplot(121)
plt.plot(data_cumsum, linestyle = "--", color="g", marker="o")
plt.subplot(122)
plt.plot(data_cumsum,"go--") ## color, marker, linestyle
plt.show()

ticks and labels

also works for y axis (set_yticks, set_xticklabels, etc)

fig = plt.figure(figsize=(4, 4))
ax = fig.add_subplot(1,1,1)
ax.plot(data_cumsum)
ticks = ax.set_xticks([0,10,20,30])
labels = ax.set_xticklabels(["A", "B", "C", "D"], rotation=45, fontsize = "small")
ax.set_xlabel("Letters")
ax.set_title("My python plot")

legend

fig = plt.figure(figsize=(4, 4))
ax = fig.add_subplot(1,1,1)
ax.plot(rng.standard_normal(30).cumsum(), "r--", label="red")
ax.plot(rng.standard_normal(30).cumsum(), "bD-", label="blue")
ax.plot(rng.standard_normal(30).cumsum(), "go-.", label="green")
ax.legend(loc="best")
plt.show()

Summary of plt attributes

Scatter plot

rng = np.random.default_rng()
data1 = rng.standard_normal(30)
data2 = rng.standard_normal(30)
plt.scatter(data1, data2)

Scatter plot

fig = plt.figure(figsize=(4, 4))
ax = fig.add_subplot(1,1,1)

rng = np.random.default_rng()
data1 = rng.standard_normal(30)
data2 = rng.standard_normal(30)
df = pd.DataFrame({"A": data1, "B": data2})
ax.scatter("A", "B", data=df)
plt.show()

Scatter plot

rng = np.random.default_rng()
data1 = rng.standard_normal(30)
data2 = rng.standard_normal(30)
df = pd.DataFrame({"A": data1, "B": data2})
df.plot.scatter("A", "B")

fig, axes = plt.subplots(1,1,figsize=(3, 3))
rng = np.random.default_rng()
data1 = rng.standard_normal(30)
data2 = rng.standard_normal(30)
df = pd.DataFrame({"A": data1, "B": data2})
df.plot.scatter("A", "B", ax=axes)

line plot

a = np.array([1,2,3,6,8,9])
b = np.random.default_rng(32608).random(6)
plt.plot(a,b)

line plot

a = np.array([1,2,3,6,8,9])
b = np.random.default_rng(32608).random(6)
df = pd.DataFrame({"A":a, "B":b})
plt.plot("A","B", data=df)

line plot

a = np.array([1,2,3,6,8,9])
b = np.random.default_rng(32608).random(6)
df = pd.DataFrame({"A":a, "B":b})
df.plot.line("A","B")

fig, axes = plt.subplots(1,1,figsize=(3, 3))
a = np.array([1,2,3,6,8,9])
b = np.random.default_rng(32608).random(6)
df = pd.DataFrame({"A":a, "B":b})
df.plot.line("A","B", ax=axes)

Categorical variable

df = pd.DataFrame({"names":['A', 'B', 'C'], "values":[1,2,3]})
plt.figure(figsize=(9, 3))
plt.subplot(131)
plt.bar("names", "values", data=df)
## <BarContainer object of 3 artists>
plt.subplot(132)
plt.scatter("names", "values", data=df)
plt.subplot(133)
plt.plot("names", "values", data=df)
plt.suptitle('Categorical Plotting')
plt.show()

Bar plot

df = pd.DataFrame({"names":['A', 'B', 'C'], "values":[1,2,3]})
fig, axes = plt.subplots(1,2,figsize=(6, 3))
df.plot.bar("names", "values", ax=axes[0], color="b")
df.plot.barh("names", "values", ax=axes[1], color="g")

Bar plot with subgroups

pd1 = pd.DataFrame(np.random.rand(4,3), index = list("abcd"), columns = ["Florida", "Texax", "Utah"])
pd1.columns.name="Columns"
pd1.index.name="States"

fig = plt.figure(figsize=(6, 3))
ax = fig.add_subplot(1,2,1)
pd1.plot.bar(ax = ax)

bx = fig.add_subplot(1,2,2)
pd1.plot.barh(ax = bx, stacked=True)
plt.show()

Histogram and density plot

rng = np.random.default_rng(32611)
data = rng.standard_normal(100)
plt.hist(data, bins=20)
## (array([ 1.,  1.,  1.,  2.,  3.,  5., 14.,  9., 10.,  9., 13., 12.,  5.,
##         5.,  6.,  1.,  0.,  1.,  1.,  1.]), array([-2.8231562 , -2.53336371, -2.24357122, -1.95377874, -1.66398625,
##        -1.37419376, -1.08440128, -0.79460879, -0.50481631, -0.21502382,
##         0.07476867,  0.36456115,  0.65435364,  0.94414612,  1.23393861,
##         1.5237311 ,  1.81352358,  2.10331607,  2.39310855,  2.68290104,
##         2.97269353]), <BarContainer object of 20 artists>)
plt.show()

Histogram and density plot

data2 = pd.DataFrame({"data": data})
fig, axes = plt.subplots(1,3,figsize=(9, 3))
## <string>:1: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). Consider using `matplotlib.pyplot.close()`.
data2.plot.hist(bins=20, ax = axes[0])
data2.plot.density(ax = axes[1])
data2.plot.hist(bins=20,density=True, ax = axes[2])
data2.plot.density(ax = axes[2])
plt.show()

heatmap

np.random.seed(32608)
plt.subplot(211)
plt.imshow(np.random.random((100, 100)))
plt.subplot(212)
plt.imshow(np.random.random((100, 100)))
cax = plt.axes([0.85, 0.1, 0.075, 0.8])
plt.colorbar(cax=cax)
## <matplotlib.colorbar.Colorbar object at 0x13d337700>
plt.show()

Reference