# CS682 Discussion Session 01: Slicing and Broadcasting in Python

## 1. Python List and Numpy Array (ndarray)

### 1.1 Difference between List and Numpy Array
- List is a collection of items. The items in a List can be numbers, strings, list, Numpy Array, etc.  
- Numpy Array is a grid of values, all of the same data type.

In [None]:
my_list = [1, '2', [3]]
my_list += [4.01]
my_list.append(4)
my_list.append([5,6])
my_list += [7,8,9]
print('my_list:', my_list)
print(type(my_list))

In [None]:
l1 = [0, 1, 2, 3, 4]
s_l1 = [x**2 for x in l1]
print('s_l1:', s_l1)

s_l1 = [x**2 for x in l1 if x % 2 == 0]
print('s_l1:', s_l1)

In [None]:
import numpy as np
my_arr = np.array(['hello', 1])
print('my_arr:', my_arr)
print(type(my_arr))
print('shape:', my_arr.shape)
print('dtype:', my_arr.dtype)
my_arr[1]+3 # Error

### 1.2 len, size, shape, indexing

In [None]:
l = [[0, 1, 2, 3], [4, 5, 6, 7]]
a = np.arange(8).reshape((2,4))
print(l)
print(len(l))
print('----------')
print(a)
print(len(a))
print(a.size)
print(a.shape)

In [None]:
print(a[1], type(a[1]))
print(a[1][2])
print(a[1, 2])
print('----------')
print(l[1][2])
print(l[1, 2]) #TypeError

### 1.3 Transfer between List and Numpy Array

- List to Numpy Array: a = np.array(l) or a = np.asarray(l)  
- Numpy Array to List: l = a.tolist()

In [None]:
l = [[0, 1, 2, 3], [4, 5, 6, 7]]
a = np.array(l)
a1 = np.asarray(l)  # array(a, dtype, copy=False, order=order)
print('a:', a, type(a))
print('a1:', a1, type(a1))


In [None]:
l1 = list(a)
print('l1:', type(l1), l1)
print('l1[0]:', type(l1[0]))
print('--------')

l2 = a.tolist()
print('l2:', type(l2), l2)
print('l2[0]:', type(l2[0]))


## 2. Slicing

### 2.1 Basic usage

In [None]:
a = np.arange(12)
print(a)
print(a.shape)
print('-------')
a = np.arange(12).reshape((3,4))
print(a)
print(a.shape)

In [None]:
row1 = a[1, :]  
row2 = a[1]
row3 = a[1:2, :]
row4 = a[1:]
print('row1\n', row1, row1.shape )
print('row2\n', row2, row2.shape)
print('row3\n', row3, row3.shape)
print('row4\n', row4, row4.shape)
print('-------')

In [None]:
col1 = a[:, -2:]
col2 = a[:, 0:-1:2]
print('col1\n', col1, col1.shape) 
print('col2\n', col2, col2.shape)
print('-------')

a1 = a[:]
a2 = a[:, :]
print(a1.shape)
print(a2.shape)

In [None]:
a = np.arange(24).reshape((2, 3, 4))
print('a.shape = ', a.shape)
print(a)

In [None]:
a1 = a[1:2, 0:3, 1:3]
print('a1.shape =', a1.shape)
print(a1)

In [None]:
a2 = a[1, 0:3, 1:3]
print('a2.shape =', a2.shape)
print(a2)

In [None]:
a3 = a[0:2, -2, -1:]
print('a3.shape =', a3.shape)
print(a3)

In [None]:
a4 = a[0:2, -2, -1]
print('a4.shape =', a4.shape)
print(a4)


### 2.2 Change of dimensions

In [None]:
a = np.arange(24).reshape((2, 3, 4))
print('a.shape =', a.shape)
print(a)
#a.reshape((3,4,5)) # ValueError

In [None]:
a = np.arange(24).reshape((6,4))
print(a.shape)
print('----nexaxis----')
print(a[np.newaxis,:, :].shape)
print(a[:,np.newaxis,:].shape)
print(a[:,:,np.newaxis].shape)

print('----reshape----')
print(a.reshape((1,6,4)).shape)
print(a.reshape((6,1,4)).shape)
print(a.reshape((6,4,1)).shape)

print('----expand_dims----')
print(np.expand_dims(a,axis=0).shape)
print(np.expand_dims(a,axis=1).shape)
print(np.expand_dims(a,axis=2).shape)

### 2.3 Modify values
[Python tutor link](http://www.pythontutor.com/) is useful to visually understand how Python works.

#### 2.3.1 list

In [None]:
l = list(range(5))
print('original: ')
print(l)
l = [x for x in range(5)]
print('original: ')
print(l)

print('exp1:')
m = l
print(m is l) #checks whether m and l refer to the same object
print(m == l) #checks whether values are the same
m[0] = -1
print(l)

print('exp2:')
m = l[:]
print(m is l)
print(m == l)
m[1] = -2
print(l)

print('exp3:')
l[2] = -3
print(l)

#### 2.3.2 numpy array

In [None]:
a = np.arange(5)
print('original: ')
print(a)

In [None]:
print('exp0:')
a[3] = -10
print(a)

In [None]:
print('exp1:')
b = a
print(a is b)
print(a == b)
b[1] = -1
print(a)

In [None]:
print('exp2:')
b = a[:]
print(a is b)
print(a == b)
# print(id(a[0]), id(b[0]))
b[2] = -2
print(a)
print(a == b)
print('-----------------')

In [None]:
print('exp3:')
b = a[:]
b = np.array([5,4,3,2,1])
print(a)
print(a is b)
print(a == b)


In [None]:
# Skip this cell

a = np.arange(12).reshape((3,4))
print('original: ')
print(a)

print('exp1:')
a[1, 2] = -1
print(a)

print('exp2:')
b = a[1, 2]
b = -2
print(a)

print('exp3:')
b = a
b[1,2] = -3
print(a)

print('exp4:')
b = a[:]
b[1,2] = -4
print(a)

print('exp5:')
b = a[2, :]
b[1] = -5
print(a)

print('exp6:')
b = a[2]
b[1] = -6
print(a)

### 2.4 Other indexing tricks
- Indexing with boolean array
- Indexing with integer list / array

In [None]:
a = np.arange(12).reshape((3,4))
print('original: ')
print(a)

print('\nexp1:')
idx = (a % 2 == 0)
print(idx)
print(type(idx))
print(a[idx])

print('\nexp2:')
idx = (a[0] < 3)
print(idx)
print(a[1:, idx])
print(a[1:, idx.tolist()])

print('\nexp3:')
idx = (a[0] < 3)
b = a[1:, idx]  #   creates a copy of the data because idx is a boolean array
b[0, 0] = -10
print(b)
print(a)

In [None]:
a = np.arange(12).reshape((3,4))
print('original: ')
print(a)

print('\nexp1:')
print(a[1, [2,3]])  # [a[1,2], a[1,3]]
print(a[1, np.array([2,3])])

print('\nexp2:')
print(a[[0, 1, 2], [1, 2, 3]])  # [a[0,1], a[1,2], a[2,3]])
print('***')
print(a[[[0, 1, 2], [1, 2, 3]]])
print('***')
idx = np.array([[0, 1, 2], [1, 2, 0]])  # [[a[0], a[1], a[2]], [a[1], a[2], a[0]]]
print(a[idx])

## 3. Broadcasting
### 3.1 The basic idea
- Universal functions: functions that apply elementwise on arrays  
    Examples: np.add, np.power, np.greater, np.log, np.absolute  
- Universal functions that takes two input arrays:  
    - Simplest case: two input arrays have same shape  
    - Two inputs with different shapes? Broadcasting!  
        Replicate values to make their shapes match  
        Can avoid making redundant copies
        
**A simple example:**

In [None]:
a = np.arange(12).reshape((3,4))
b = 1.1
c = np.arange(4)
d = np.arange(3)

print("a =",a)
print('----------------')
print("b =",b)
print('----------------')
print("c =",c)
print('----------------')
print("d =",d)
print('----------------')
print("a*b =\n",a * b)
print('----------------')
print("(a * b) + c =\n",(a * b) + c)
# print('----------------')
print("(a * b) + c + d=\n",(a * b) + c + d[:,np.newaxis])

In [None]:
b1 = np.arange(3).reshape((3,1))
b2 = np.arange(5).reshape((5,1))
print(b1)
print('----------')
print(b2)
print('----------')
print(b1+b2.T)

In [None]:
b1_tile = np.tile(b1, (1,5))
print(b1_tile)
print('-----------')
b2_tile = np.tile(b2.T, (3,1))
print(b2_tile)
print('-----------')
print(b1_tile+b2_tile)

### 3.2 The broadcasting rule
**Example:**  
Shape of A:   2 x 4 x 1 x 3  
Shape of B:   5 x 1 
Shape of A+B: 2 x 4 x 5 x 3

- If one array has smaller dimension, fill 1's at the beginning of its shape
    - B: 5 x 1 --> 1 x 1 x 5 x 1
- Start from the last dimension and work forward
- If one array has length 1 for the current dimension, replicate the values in that dimension
    - A: 2 x 4 x 1 x 3 --> 2 x 4 x 5 x 3  
    - B: 1 x 1 x 5 x 1 --> 2 x 4 x 5 x 3
- If either array has greater than 1 for a dimension, and two arrays don't match: report an error

In [None]:
A = np.arange(2*4*3).reshape((2,4,1,3))
B = np.arange(5).reshape((5,1)) * 0.1
C = A + B
print('A\n', A)
print('\nB\n', B)
print('\nC', C.shape)
print(C)

## 4. Advice
- Keep track of shapes of the variables:  
    - Write your expected shapes in the comments
    - Print out the actual shapes and see if it matches
- Make up small examples and test your code

## 5. Practice Question
100 students are divided into 5 teams (team 0,1,2,3,4). There are 3 courses. Each student has a grade (0 ~ 1) for each course.  
The criteria of an "honor student" is that: for every course, the student needs to get a grade higher than the average grade of all the students that are not in the same team with him / her.  
For example, when considering students from team 1, 2, 3 and 4, the average grade for the three courses are 0.8, 0.85, 0.9 respectively. An "honor student" from team 0 needs to get higher than 0.8, 0.85, 0.9 respectively for the three courses.

teams = np.random.choice(5, size=100)  
grades = np.random.rand(3, 100)

Find out the number of honor students in each team.

In [None]:
import numpy as np
teams = np.random.choice(5, size=100)
grades = np.random.rand(3, 100)
print(teams.shape)
print(grades.shape)

# -------- count elements ------
print('--------')
elements, counts = np.unique(teams, return_counts=True)
print(elements)
print(counts)
print('-----------')
print(np.bincount(teams))
print('-----------')
import collections
count = collections.Counter(teams)
print(count)


In [None]:
team_mask = np.arange(5).reshape((5,1)) != teams  # 5x100
print(team_mask.shape)
print('-------')
print(teams[:8])
print('-------')
print(team_mask[:, :8])
print('-------')
sum_grades = grades.dot(team_mask.T)  # 3x5
print(sum_grades)


In [None]:
count_students = np.sum(team_mask, axis=1)  # 5
print(count_students)

ave_grades = sum_grades / count_students  # 3x5
print(ave_grades)

In [None]:
require_grades = ave_grades[:, teams]  # 3x100
print(require_grades.shape)
print(require_grades[:, :4])

In [None]:
is_honor = np.all(grades > require_grades, axis=0)  # 100
print(is_honor.shape)
print(grades[:, :4])
print(is_honor[:4])

In [None]:
team_honor = (np.arange(5).reshape((5,1)) == teams) * is_honor  # 5x100
print(team_honor.shape)
print(team_honor[:, :4])

honor_count = np.sum(team_honor, axis=1)  # 5
print(honor_count.shape)
print(honor_count)

## 6. Miscellaneous

Disassembler for python bytecode

In [None]:
from dis import dis 
a = list(range(5))
print(a)
dis("a[1]")
print('-------')
dis("a[1, :]")
print('-------')
dis("a[1:2, :]")
print('-------')

dis("a[::1]")
print('-------')
dis("a[None:None:1]")
dis("a[0:5:1]")

print('-------')
dis("a[::-1]")
dis("a[None:None:-1]")
dis("a[-1:-6:-1]")
# print(a[-1:-6:-1])