Numpy 入门

NumPy 笔记目录

  • [Array Creation](#Array Creation)
  • [Basic Operations](#Basic Operations)
  • [Indexing, Slicing and Iterating](#Indexing, Slicing and Iterating)
  • [Shape Manipulation](#Shape Manipulation)
    • [Changing the shape](#Changing the shape)
    • [Stacking together](#Stacking together)
    • Splitting
    • [Copy and Views](#Copy and Views)

Array Creation

np.arange

1
2
3
4
>>> import numpy as np
>>> a = np.arange(15).reshapre(3, 5)
>>> np.arange(10, 30, 5)
array([10, 15, 20, 25])

np.array

1
2
3
4
5
6
7
>>> import numpy as np
>>> a = np.array([(2,3,4), (3,4,5)]) or
>>> a = np.array([ [2,3,4], [3,4,5] ])
>>> b = np.array([ [1,2], [3,4] ], dtype=complex)
>>> np.zeros(5)
>>> np.zeros((3,4))
>>> np.ones( (2,3,4), dtype=np.int16 ) # dtype can also be specified

Basic Operations

dot

1
2
>>> A.dot(B) or
>>> np.dot(A, B)

axis

1
2
3
4
5
>>> b = np.arange(12).reshape(3,4)
array(
[[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
1
2
>>> b.sum(axis=0)  # sum of each column
array([12, 15, 18, 21])
1
2
>>> b.min(axis=1)  # min of each row
array([0, 4, 8])
1
2
3
4
5
>>> b.cumsum(axis=1)  # cumulative sum along each row
array(
[[ 0, 1, 3, 6],
[ 4, 9, 15, 22],
[ 8, 17, 27, 38]])

Indexing, Slicing and Iterating

One-dimensional

1
2
>>> a = np.arange(10)**3
array([ 0, 1, 8, 27, 64, 125, 216, 343, 512, 729])
1
2
>>> a[: 6 : 2] = -1000   # equivalent to a[0:6:2] = -1000; from start to position 6, exclusive, set every 2nd element (二次元) to -1000
array([ -1000, 1, -1000, 27, -1000, 125, 216, 343, 512, 729])
1
2
>>> a[ : :-1]  # reversed a
array([ 729, 512, 343, 216, 125, -1000, 27, -1000, 1, -1000])

Multidimensional

1
2
>>> def f(x,y):
... return 10*x+y
1
2
3
4
5
6
7
8
>>> b = np.fromfunction(f,(5,4),dtype=int)
>>> b
array(
[[ 0, 1, 2, 3],
[10, 11, 12, 13],
[20, 21, 22, 23],
[30, 31, 32, 33],
[40, 41, 42, 43]])
1
2
3
4
>>> b[1:3, : ]   # each column in the second and third row of b
array(
[[10, 11, 12, 13],
[20, 21, 22, 23]])
1
2
3
4
>>> c = np.array( [[[  0,  1,  2],
... [ 10, 12, 13]],
... [[100,101,102],
... [110,112,113]]])
1
2
>>> c.shape
(2, 2, 3)
1
2
3
4
>>> c[1,...]     # same as c[1,:,:] or c[1]
array(
[[100, 101, 102],
[110, 112, 113]])
1
2
3
4
>>> c[...,2]       # same as c[:,:,2]
array(
[[ 2, 13],
[102, 113]])

Iterating

1
2
3
4
5
6
7
8
9
>>> for row in b:
>>> print(row)
[0 1 2 3]
[10 11 12 13]
[20 21 22 23]
[30 31 32 33]
[40 41 42 43]
>>> for element in b.flat:
print(element) # this will print every element.

key words: flat

Indexing with Arrays

1
2
3
4
5
6
>>> a = np.arange(12).reshape(3,4)
>>> a
array(
[[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
1
2
3
4
>>> i = np.array([ [0,1],
[1,2] ] ) # indices for the first dim of a
>>> j = np.array( [ [2,1],
[3,3] ] ) # indices for the second dim
1
2
3
4
5
6
7
>>> a[i,j]  # i and j must have equal shape
array(
[[ 2, 5],
[ 7, 11]])
组合顺序如下:
(0, 2), (1, 1)
(1, 3), (2, 3)
1
2
3
4
5
6
7
>>> a[i] # equal to a[i, :]
array(
[[[0, 1, 2, 3],
[4, 5 ,6, 7]],

[[4, 5, 6, 7],
[8, 9,10,11]]])
1
2
3
>>> a[i,2]
array([[ 2, 6],
[ 6, 10]])
1
2
3
4
5
6
7
8
>>> a[:,j]   # i.e., a[ : , j]
array(
[[[ 2, 1],
[ 3, 3]],
[[ 6, 5],
[ 7, 7]],
[[10, 9],
[11, 11]]])
1
2
3
4
5
>>> l = [i,j]
>>> a[l] # equivalent to a[i,j]
array(
[[ 2, 5],
[ 7, 11]])
1
2
3
4
5
6
7
8
>>> s = np.array( [i,j] ) # s.shape = (2, 2, 2)
>>> a[s] # not what we want
Traceback (most recent call last): File "<stdin>", line 1, in ?
IndexError: index (3) out of range (0<=index<=2) in dimension 0
>>> a[tuple(s)] # same as a[i,j]
array(
[[ 2, 5],
[ 7, 11]])
1
2
3
4
5
6
7
8
9
10
11
>>> time = np.linspace(20, 145, 5)    # time scale
>>> data = np.sin(np.arange(20)).reshape(5,4)
>>> time
array([ 20\. , 51.25, 82.5 , 113.75, 145\. ])
>>> data
array(
[[ 0\. , 0.84147098, 0.90929743, 0.14112001],
[-0.7568025 , -0.95892427, -0.2794155 , 0.6569866 ],
[ 0.98935825, 0.41211849, -0.54402111, -0.99999021],
[-0.53657292, 0.42016704, 0.99060736, 0.65028784],
[-0.28790332, -0.96139749, -0.75098725, 0.14987721]])
1
2
3
>>> ind = data.argmax(axis=0)  # index of the maxima for each series
>>> ind
array([2, 0, 3, 1]) # 位置
1
2
>>> time_max = time[ind]    # times corresponding to the maxima
>>> data_max = data[ind, xrange(data.shape[1])] # => data[ind[0],0], data[ind[1],1]. 等价于data[ind, range(4)]
1
2
3
4
>>> time_max
array([ 82.5 , 20\. , 113.75, 51.25])
>>> data_max
array([ 0.98935825, 0.84147098, 0.99060736, 0.6569866 ])
1
2
>>> np.all(data_max == data.max(axis=0))
True

key words: data.argmax(axis=0), data.max(axis=0)

np.ix_()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
>>> a = np.array([2,3,4,5])
>>> b = np.array([8,5,4])
>>> c = np.array([5,4,6,8,3])
>>> ax,bx,cx = np.ix_(a,b,c)
>>> ax
array(
[[[2]],
[[3]],
[[4]],
[[5]]])
>>> bx
array(
[[[8],
[5],
[4]]])
>>> cx
array(
[[[5, 4, 6, 8, 3]]])
>>> ax.shape, bx.shape, cx.shape
((4, 1, 1), (1, 3, 1), (1, 1, 5))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
>>> a = np.arange(10).reshape(2,5)
>>> a
array(
[[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])
>>> ixgrid = np.ix_([0,1], [2,4])
>>> ixgrid
(array([[0],
[1]], array([[2, 4]]))
>>> ixgrid[0].shape, ixgrid[1].shape
((2,1), (1,2))
>>> a[ixgrid]
array([[2, 4],
[7, 9]])

key words: np.ix_(),可以发现从 ax 到 cx,分别对应 4,3,5。

Shape Manipulation

Changing the shape

1
2
3
4
5
6
>>> a = np.floor(10*np.random.random((3,4)))
>>> a
array(
[[ 2., 8., 0., 6.],
[ 4., 5., 1., 1.],
[ 8., 9., 3., 6.]])
1
2
>>> a.ravel() # flatten the array
array([ 2., 8., 0., 6., 4., 5., 1., 1., 8., 9., 3., 6.])
1
2
3
4
5
>>> a.resize((2,6))
>>> a
array(
[[ 2., 8., 0., 6., 4., 5.],
[ 1., 1., 8., 9., 3., 6.]])
1
2
3
4
5
>>> a.reshape(3,-1) # equal to a.reshape(3, 4)
array(
[[ 2., 8., 0., 6.],
[ 4., 5., 1., 1.],
[ 8., 9., 3., 6.]])
1
2
3
4
>>> a = np.arange(30)
>>> a.shape = 2,-1,3 # -1 means "whatever is needed"
>>> a.shape
(2, 5, 3)

key words: ravel, reshape, resize

Stacking together

1
2
3
4
5
>>> a = np.floor(10*np.random.random((2,2)))
>>> a
array(
[[ 8., 8.],
[ 0., 0.]])
1
2
3
4
5
>>> b = np.floor(10*np.random.random((2,2)))
>>> b
array(
[[ 1., 8.],
[ 0., 4.]])
1
2
3
4
5
6
>>> np.vstack((a,b))  # vertical stack
array(
[[ 8., 8.],
[ 0., 0.],
[ 1., 8.],
[ 0., 4.]])
1
2
3
4
>>> np.hstack((a,b))  # horizontal stack
array(
[[ 8., 8., 1., 8.],
[ 0., 0., 0., 4.]])
1
2
3
4
5
>>> from numpy import newaxis
>>> np.column_stack((a,b)) # With 2D arrays, it's equal to np.hstack((a, b))
array(
[[ 8., 8., 1., 8.],
[ 0., 0., 0., 4.]])
1
2
3
>>> a = np.array([4.,2.])  # a.shape = (2,)
array([4., 2.])
>>> b = np.array([3.,8.])
1
2
3
4
>>> a[:, newaxis]  # This allows to have a 2D columns vector  a.shape = (2, 1)
array(
[[ 4.],
[ 2.]])
1
2
3
4
>>> np.column_stack((a[:,newaxis],b[:,newaxis]))  # equal to np.vstack(a, b)
array(
[[ 4., 3.],
[ 2., 8.]])
1
2
3
4
5
6
>>> np.vstack((a[:,newaxis],b[:,newaxis])) # The behavior of vstack is different
array(
[[ 4.],
[ 2.],
[ 3.],
[ 8.]])
1
2
3
4
>>> np.vstack((a, b))
array(
[[ 4., 2.],
[ 3., 8.]])
1
2
3
4
5
>>> x = np.arange(0,10,2)  # x=([0,2,4,6,8])
>>> y = np.arange(5) # y=([0,1,2,3,4])
>>> m = np.vstack([x,y]) # m=([[0,2,4,6,8],
[0,1,2,3,4]])
>>> xy = np.hstack([x,y]) # xy =([0,2,4,6,8,0,1,2,3,4])
1
2
3
In complex cases, _r and _c are useful for creating arrays by stacking numbers along one axis. They allow the use of range literals (”:”) :
>>> np.r_[1:4,0,4]
array([1, 2, 3, 0, 4])

key words: hstack, vstack, newaxis,注意 Vector 的连接方法。

Splitting

1
2
3
4
5
>>> a = np.floor(10*np.random.random((2,12)))
>>> a
array(
[[ 9., 5., 6., 3., 6., 8., 0., 7., 9., 7., 2., 7.],
[ 1., 4., 9., 2., 2., 1., 0., 6., 2., 2., 4., 0.]])
1
2
3
4
5
6
7
8
9
10
>>> np.hsplit(a,3)   # Split a into 3
[array(
[[ 9., 5., 6., 3.],
[ 1., 4., 9., 2.]]),
array(
[[ 6., 8., 0., 7.],
[ 2., 1., 0., 6.]]),
array(
[[ 9., 7., 2., 7.],
[ 2., 2., 4., 0.]])]
1
2
3
4
5
6
7
8
9
10
>>> np.hsplit(a,(3,4))   # Split a after the third and the fourth column
[array(
[[ 9., 5., 6.],
[ 1., 4., 9.]]),
array(
[[ 3.],
[ 2.]]),
array(
[[ 6., 8., 0., 7., 9., 7., 2., 7.],
[ 2., 1., 0., 6., 2., 2., 4., 0.]])]

key words: hsplit

Copy and Views

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
>>> c = a.view()
>>> c is a
False
>>> c.base is a # c is a view of the data owned by a
True
>>> c.flags.owndata
False
>>>
>>> c.shape = 2,6 # a's shape doesn't change
>>> a.shape
(3, 4)
>>> c[0,4] = 1234 # a's data changes
>>> a
array(
[[ 0, 1, 2, 3],
[1234, 5, 6, 7],
[ 8, 9, 10, 11]])
1
2
3
4
5
6
7
8
9
10
11
>>> d = a.copy()  # a new array object with new data is created
>>> d is a
False
>>> d.base is a # d doesn't share anything with a
False
>>> d[0,0] = 9999
>>> a
array(
[[ 0, 1, 2, 3],
[1234, 5, 6, 7],
[ 8, 9, 10, 11]])

key words: view, base, copy
view 复制了 data 但不复制 shape,就是原始数据的 view; copy 完全复制一份。

引自官方手册