import numpy as np

a = np.array([1, 2, 3])
print(a)

[1 2 3]

b = np.ones([2, 3], dtype=int)
print(b)

[[1 1 1]
 [1 1 1]]

c = np.zeros([3, 2])
print(c)

[[0. 0.]
 [0. 0.]
 [0. 0.]]

# parameter list: start, stop, skip
d = np.arange(10)
print(d)

[0 1 2 3 4 5 6 7 8 9]

e1 = np.tile(3, [3, 6])
print(e1)

print()

e2 = np.tile([1, 2], [3, 4])
print(e2)  # print [[1, 2, ..three times]] * 4

[[3 3 3 3 3 3]
 [3 3 3 3 3 3]
 [3 3 3 3 3 3]]

[[1 2 1 2 1 2 1 2]
 [1 2 1 2 1 2 1 2]
 [1 2 1 2 1 2 1 2]]

f1 = np.repeat([1, 2], 5)
print(f1)

f2 = np.repeat([1, 2], [5, 3])
print(f2)  # 1 five times, 2 three times

[1 1 1 1 1 2 2 2 2 2]
[1 1 1 1 1 2 2 2]

g = np.linspace(1, 3, 10)
print(g)

[1.         1.22222222 1.44444444 1.66666667 1.88888889 2.11111111
 2.33333333 2.55555556 2.77777778 3.        ]

h = np.matrix('1, 2; 3, 4; 5, 6')
print(h)

[[1 2]
 [3 4]
 [5 6]]

Structured NumPy Array:¶

source: https://jakevdp.github.io/PythonDataScienceHandbook/02.09-structured-data-numpy.html

The first (optional) character is < or >, which means "little endian" or "big endian," respectively, and specifies the ordering convention for significant bits. The next character specifies the type of data: characters, bytes, ints, floating points, and so on (see the table below). The last character or characters represents the size of the object in bytes.

Character Description Example

'b' Byte np.dtype('b')

'i' Signed integer np.dtype('i4') == np.int32

'u' Unsigned integer np.dtype('u1') == np.uint8

'f' Floating point np.dtype('f8') == np.int64

'c' Complex floating point np.dtype('c16') == np.complex128

'S', 'a' String np.dtype('S5')

'U' Unicode string np.dtype('U') == np.str_

'V' Raw data (void) np.dtype('V') == np.void

# simple substructure numpy array:
dt1 = np.dtype({'names':('name', 'age', 'weight'),
                'formats':('U10', 'i4', 'f8')}) 


# A compound type can also be specified as a list of tuples:
dt2 = np.dtype([('name', 'S10'), ('age', 'i4'), ('weight', 'f8')])


# If the names of the types do not matter to you:
dt3 = np.dtype('S10,i4,f8')


# An int, with a 3×3 matrix
dt4 = np.dtype([('id', 'i8'), ('mat', 'f8', (2, 3))])

print(dt4)

[('id', '<i8'), ('mat', '<f8', (2, 3))]

act = np.zeros(2, dtype=dt4)

print( act, end='\n\n')
print( act[0], end='\n\n')
print( act[0]['mat'], end='\n\n')

[(0, [[0., 0., 0.], [0., 0., 0.]]) (0, [[0., 0., 0.], [0., 0., 0.]])]

(0, [[0., 0., 0.], [0., 0., 0.]])

[[0. 0. 0.]
 [0. 0. 0.]]

name = ['Alice', 'Bob', 'Cathy', 'Doug']
age = [25, 45, 37, 19]
weight = [55.0, 85.5, 68.0, 61.5]

i = np.zeros(4, dtype={'names':('name', 'age', 'weight'),
                       'formats':('U10', 'i4', 'f8')})
i['name'] = name
i['age'] = age
i['weight'] = weight

print(i, end='\n\n')
print(i['name'], end='\n\n')
print(i[0], end='\n\n')

[('Alice', 25, 55. ) ('Bob', 45, 85.5) ('Cathy', 37, 68. )
 ('Doug', 19, 61.5)]

['Alice' 'Bob' 'Cathy' 'Doug']

('Alice', 25, 55.)

Broadcasting in NumPy:¶

Rules of broadcasting:
When operating on two arrays, NumPy compares their shapes element-wise. It starts with the trailing dimensions, and works its way forward. Two dimensions are compatible when

they are equal, or
one of them is 1

j1 = np.array([1, 2, 3])
j2 = np.array([4, 5, 6])

print('j1 + j2 =', j1 + j2)
print('j1 + 1 =', j1 + 1)
print('j1 * 10 =', j1 * 10)

j3 = np.arange(15).reshape(5, 3)
j4 = np.arange(5).reshape(5, 1)

print('\nj3 + j4 = \n', j3 + j4)

j1 + j2 = [5 7 9]
j1 + 1 = [2 3 4]
j1 * 10 = [10 20 30]

j3 + j4 = 
 [[ 0  1  2]
 [ 4  5  6]
 [ 8  9 10]
 [12 13 14]
 [16 17 18]]

j5 = np.ones([6, 3, 1, 3, 3]) # compare diagonally
j6 = np.ones([1, 3, 2, 1, 3])

print(j5 + j6)  # shape is (6, 3, 2, 3, 3)

[[[[[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]

   [[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]]


  [[[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]

   [[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]]


  [[[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]

   [[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]]]


 [[[[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]

   [[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]]


  [[[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]

   [[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]]


  [[[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]

   [[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]]]


 [[[[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]

   [[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]]


  [[[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]

   [[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]]


  [[[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]

   [[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]]]


 [[[[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]

   [[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]]


  [[[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]

   [[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]]


  [[[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]

   [[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]]]


 [[[[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]

   [[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]]


  [[[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]

   [[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]]


  [[[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]

   [[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]]]


 [[[[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]

   [[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]]


  [[[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]

   [[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]]


  [[[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]

   [[2. 2. 2.]
    [2. 2. 2.]
    [2. 2. 2.]]]]]

Indexing¶

k1 = np.arange(3)
print(k1[:, np.newaxis]) 
print(k1[:, None]) # np.newaxis = None

[[0]
 [1]
 [2]]
[[0]
 [1]
 [2]]

k2 = np.arange(50).reshape(10, 5, 1)
# print(k2)
# print(k2[2])
print(k2[2, ...])
# print(k2[2, :, :])

[[10]
 [11]
 [12]
 [13]
 [14]]

k3 = np.arange(25).reshape(5, 1, 1, 5, 1)
# print(k3)

k3 = k3.squeeze()
print(k3)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]

print(k3.shape)

(5, 5)

Printing diagonal of matrix ie 00, 11, 22, ...

print(k3[
    [0, 1, 2, 3, 4],
    [0, 1, 2, 3, 4]
])

[ 0  6 12 18 24]

Changes in new sliced variable:¶

Changes in new sliced variable will change in original one.

It is used, when our array is too large to fit in memory, so we slice that part of it and operate on new sliced variable.

k4 = k3[0:2]
k4[0, 0] = 100
print(k4)
print(k3)

[[100   1   2   3   4]
 [  5   6   7   8   9]]
[[100   1   2   3   4]
 [  5   6   7   8   9]
 [ 10  11  12  13  14]
 [ 15  16  17  18  19]
 [ 20  21  22  23  24]]

print(k3[::2, [1, 2]])

[[ 1  2]
 [11 12]
 [21 22]]

np.ix_() function¶

l = np.ix_([0, 2], [0, 2, 4])
print(l)
print(k3[l])

(array([[0],
       [2]]), array([[0, 2, 4]]))
[[100   2   4]
 [ 10  12  14]]

Question:

add two arrays using broadcasting
access first and last element of every row # arr[table][row][colomn]

m1 = np.arange(25).reshape([5, 5])
m2 = np.arange(75).reshape([5, 5, 3])

answer = np.reshape(m1, (5, 5, 1)) + m2
print(answer[::, [0, 2, 4]])

[[[ 0  1  2]
  [ 8  9 10]
  [16 17 18]]

 [[20 21 22]
  [28 29 30]
  [36 37 38]]

 [[40 41 42]
  [48 49 50]
  [56 57 58]]

 [[60 61 62]
  [68 69 70]
  [76 77 78]]

 [[80 81 82]
  [88 89 90]
  [96 97 98]]]