Introduction to Biostatistical Computing PHC 6937

Numpy Advanced

Zhiguang Huo (Caleb)

Wednesday Nov 9th, 2022

Outlines

reshape and flatten

import numpy as np

x = np.zeros(8).reshape(2,4)
x
## array([[0., 0., 0., 0.],
##        [0., 0., 0., 0.]])
x.reshape(4,-1)
## array([[0., 0.],
##        [0., 0.],
##        [0., 0.],
##        [0., 0.]])
x.flatten() 
## array([0., 0., 0., 0., 0., 0., 0., 0.])
x.ravel()
## array([0., 0., 0., 0., 0., 0., 0., 0.])

Concatenating

x = np.zeros(6).reshape(2,3); x
## array([[0., 0., 0.],
##        [0., 0., 0.]])
y = np.ones(6).reshape(2,3); y
## array([[1., 1., 1.],
##        [1., 1., 1.]])
np.concatenate([x,y],axis=0) ## axis=0 is default
## array([[0., 0., 0.],
##        [0., 0., 0.],
##        [1., 1., 1.],
##        [1., 1., 1.]])

np.concatenate([x,y],axis=1)
## array([[0., 0., 0., 1., 1., 1.],
##        [0., 0., 0., 1., 1., 1.]])

Concatenating

np.vstack([x,y]) 
## array([[0., 0., 0.],
##        [0., 0., 0.],
##        [1., 1., 1.],
##        [1., 1., 1.]])
np.hstack([x,y]) 
## array([[0., 0., 0., 1., 1., 1.],
##        [0., 0., 0., 1., 1., 1.]])

Concatenating

np.c_[x,y]
## array([[0., 0., 0., 1., 1., 1.],
##        [0., 0., 0., 1., 1., 1.]])
np.r_[x,y]
## array([[0., 0., 0.],
##        [0., 0., 0.],
##        [1., 1., 1.],
##        [1., 1., 1.]])

Splitting

rng = np.random.default_rng(32608)
data = rng.standard_normal((5,2))
np.split(data, [2]) ## default: axis = 0
## [array([[ 0.21984492, -1.28342596],
##        [ 1.31520866,  0.85478637]]), array([[-1.12501008, -2.36333542],
##        [ 1.72194583, -1.29984418],
##        [ 0.82499496,  0.55837717]])]
x, y = np.split(data, [2])
x
## array([[ 0.21984492, -1.28342596],
##        [ 1.31520866,  0.85478637]])
y
## array([[-1.12501008, -2.36333542],
##        [ 1.72194583, -1.29984418],
##        [ 0.82499496,  0.55837717]])

Splitting

x, y, z = np.split(data, [1,3])
x;y;z
## array([[ 0.21984492, -1.28342596]])
## array([[ 1.31520866,  0.85478637],
##        [-1.12501008, -2.36333542]])
## array([[ 1.72194583, -1.29984418],
##        [ 0.82499496,  0.55837717]])
a, b = np.split(data, [1],axis=1)
a;b 
## array([[ 0.21984492],
##        [ 1.31520866],
##        [-1.12501008],
##        [ 1.72194583],
##        [ 0.82499496]])
## array([[-1.28342596],
##        [ 0.85478637],
##        [-2.36333542],
##        [-1.29984418],
##        [ 0.55837717]])

Splitting

x, y, z = np.vsplit(data, [1,3])
x;y;z
## array([[ 0.21984492, -1.28342596]])
## array([[ 1.31520866,  0.85478637],
##        [-1.12501008, -2.36333542]])
## array([[ 1.72194583, -1.29984418],
##        [ 0.82499496,  0.55837717]])
a, b = np.hsplit(data, [1])
a;b 
## array([[ 0.21984492],
##        [ 1.31520866],
##        [-1.12501008],
##        [ 1.72194583],
##        [ 0.82499496]])
## array([[-1.28342596],
##        [ 0.85478637],
##        [-2.36333542],
##        [-1.29984418],
##        [ 0.55837717]])

repeat (1darray)

data = np.array([1,2,3])
data.repeat(2)
## array([1, 1, 2, 2, 3, 3])
data.repeat([2,3,4])
## array([1, 1, 2, 2, 2, 3, 3, 3, 3])

repeat (2darray)

data = np.arange(1,5).reshape(2,2)
data.repeat(2) ## default, flatten the array
## array([1, 1, 2, 2, 3, 3, 4, 4])
data.repeat(2, axis=0)
## array([[1, 2],
##        [1, 2],
##        [3, 4],
##        [3, 4]])
data.repeat(2, axis=1)
## array([[1, 1, 2, 2],
##        [3, 3, 4, 4]])
data.repeat([2,3], axis=1)
## array([[1, 1, 2, 2, 2],
##        [3, 3, 4, 4, 4]])

tile

data = np.arange(1,5).reshape(2,2)
np.tile(data, 2) 
## array([[1, 2, 1, 2],
##        [3, 4, 3, 4]])
np.tile(data, (2,1)) ## 2 rows and 1 column -- tiles
## array([[1, 2],
##        [3, 4],
##        [1, 2],
##        [3, 4]])
np.tile(data, (3,2)) 
## array([[1, 2, 1, 2],
##        [3, 4, 3, 4],
##        [1, 2, 1, 2],
##        [3, 4, 3, 4],
##        [1, 2, 1, 2],
##        [3, 4, 3, 4]])

Take and put (alternative to fancy indexing)

data = np.arange(0,100,10); data
## array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])
inds = [9,5,2,7]
data[inds]
## array([90, 50, 20, 70])
data.take(inds)
## array([90, 50, 20, 70])

Take and put (alternative to fancy indexing)

data = np.arange(0,100,10); data
## array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])
inds = [9,5,2,7]
data.put(inds, -99)
data
## array([  0,  10, -99,  30,  40, -99,  60, -99,  80, -99])
data.put(inds, [0,1,2,3])
data
## array([ 0, 10,  2, 30, 40,  1, 60,  3, 80,  0])

Take and put (2darray)

data = np.arange(12).reshape(3,-1)
ind = [1,1,0,0]
data.take(ind, axis=None) ## default
## array([1, 1, 0, 0])
data.take(ind, axis=0)
## array([[4, 5, 6, 7],
##        [4, 5, 6, 7],
##        [0, 1, 2, 3],
##        [0, 1, 2, 3]])
data.take(ind, axis=1)
## array([[1, 1, 0, 0],
##        [5, 5, 4, 4],
##        [9, 9, 8, 8]])

Broadcasting

Broadcasting is about arithmetic work between arrays with different shapes.

data = np.arange(6)
data * 5
## array([ 0,  5, 10, 15, 20, 25])
np.full(6,5)
## array([5, 5, 5, 5, 5, 5])
data * np.full(6,5)
## array([ 0,  5, 10, 15, 20, 25])

Broadcasting

data2d = np.arange(6).reshape(3,2)
vec1d = np.arange(2)
data2d + vec1d
## array([[0, 2],
##        [2, 4],
##        [4, 6]])
vec2d = np.tile(vec1d,(3,1))
vec2d
## array([[0, 1],
##        [0, 1],
##        [0, 1]])
data2d + vec2d
## array([[0, 2],
##        [2, 4],
##        [4, 6]])

Broadcasting

data2d = np.arange(6).reshape(3,2)
vec1d = np.arange(3).reshape(3,1)
data2d + vec1d
## array([[0, 1],
##        [3, 4],
##        [6, 7]])
vec2d = np.tile(vec1d,(1,2))
vec2d
## array([[0, 0],
##        [1, 1],
##        [2, 2]])
data2d + vec2d
## array([[0, 1],
##        [3, 4],
##        [6, 7]])

Broadcasting

data2d = np.arange(6).reshape(3,2)
vec1d = np.arange(3)
data2d + vec1d

Broadcasting (exercise)

data2d = np.arange(12).reshape(4,3)
data2d
## array([[ 0,  1,  2],
##        [ 3,  4,  5],
##        [ 6,  7,  8],
##        [ 9, 10, 11]])

Broadcasting (solution to the exercise)

data2d - data2d.mean(axis=1).reshape(4,1)
## array([[-1.,  0.,  1.],
##        [-1.,  0.,  1.],
##        [-1.,  0.,  1.],
##        [-1.,  0.,  1.]])
data2d - data2d.mean(axis=0)
## array([[-4.5, -4.5, -4.5],
##        [-1.5, -1.5, -1.5],
##        [ 1.5,  1.5,  1.5],
##        [ 4.5,  4.5,  4.5]])

create a new dimension

data = np.ones((3,3))
data_3d = data.reshape(3,1,3)
data_3d = data[:,np.newaxis,:]
data_3d.shape
## (3, 1, 3)

3d array

rng = np.random.default_rng(32608)
data3d = rng.standard_normal((2,3,4))
data3d.shape
## (2, 3, 4)
depth_means = data3d.mean(axis=2) ## row, column, depth
depth_means
## array([[ 0.2766035 , -0.76656096, -0.01555895],
##        [-0.7650591 ,  0.12205003,  0.13344552]])
demeaned = data3d - depth_means[:,:,np.newaxis]
demeaned.mean(2)
## array([[ 0.00000000e+00, -5.55111512e-17,  1.11022302e-16],
##        [ 0.00000000e+00, -2.77555756e-17,  1.38777878e-17]])

data aggregation

data = np.arange(1,5)
np.add(data[0], data[1])
## 3
np.add.reduce(data)
## 10
np.add.accumulate(data)
## array([ 1,  3,  6, 10])
np.cumsum(data)
## array([ 1,  3,  6, 10])
np.multiply.reduce(data)
## 24
np.multiply.accumulate(data)
## array([ 1,  2,  6, 24])
np.cumprod(data)
## array([ 1,  2,  6, 24])

numpy file ave and load

data = np.arange(1,10)
np.save("my_data.npy", data)
np.load("my_data.npy")
data1 = np.arange(1,10); data2 = np.arange(2,11); 
np.savez("my_data2.npz", x=data1, y=data2)
dat = np.load("my_data2.npz")
dat["x"]
dat["y"]

Reference