Contents
Deep Learning Prerequisites: The Numpy Stack in Python
在 udemy 上的一個免費課程,稍微記一下練習的程式碼。
https://www.udemy.com/deep-learning-prerequisites-the-numpy-stack-in-python/learn/v4/overview
Install library
安裝相關函式庫的方法網路上蠻多的。
從 http://www.lfd.uci.edu/~gohlke/pythonlibs/ 下載到本機
numpy-mkl 和 scipy
pip install “本機地址”
cp36 為 python3.6
pip install "C:\Users\xxx\Desktop\numpy-1.12.0+mkl-cp36-cp36m-win_amd64.whl" pip install "C:\Users\xxx\Desktop\mkl_service-1.1.2-cp36-cp36m-win_amd64.whl" pip install "C:\Users\xxx\Desktop\scipy-0.18.1-cp36-cp36m-win_amd64.whl"
測試:
import numpy import scipy numpy.array([1,2,3]) pip list
更新套件
pip install [package] --upgrade
pandas and matplotlib
pip install pandas pip install matplotlib
Numpy
import numpy as np
Lists vs. Arrays
L = [1, 2, 3]
A = np.array([1, 2, 3])
L += L # L.append(L)
A += A # [1+1,2+2,3+3]
print(L) # [1, 2, 3, 1, 2, 3]
print(A) # [2 4 6]
np.sqrt(A)
Dot product 1
a = np.array([1, 2])
b = np.array([3, 4])
dot = 0
for aa, bb in zip(a, b):
dot += aa * bb
print(dot) # 1*3+2*4 = 11
print(a * b) # [3 8]
# all are 11
print(np.sum(a * b))
print((a * b).sum())
print(np.dot(a, b))
print(a.dot(b))
Vectors ans Matrices
M = np.array([[1, 2], [3, 4]])
L = [[1, 2], [3, 4]]
print(M[0]) # [1 2]
print(L[0]) # [1, 2]
M2 = np.matrix([[1, 2], [3, 4]])
A = np.array(M2)
print(M2)
'''
[[1 2]
[3 4]]
'''
print(A.T) # transpose
'''
[[1 3]
[2 4]]
'''
Generating Matrices to work with
z = np.zeros(10) # array([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
z = np.zeros((5, 5))
'''
array([[ 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0.]])
'''
o = np.ones(5) # array([ 1., 1., 1., 1., 1.])
r = np.random.random(3) # array([ 0.71596313, 0.17285695, 0.25807367])
g = np.random.randn(3) # array([ 0.5537719 , -0.58587229, 0.17593598])
g.mean() # 0.04794519548480905
g.var() # 0.22465563659358631
Matrix Products
numpy 中,矩陣乘法的 $C(i,j)=\Sigma A(i,k)*B(k,j)$,是用 C=A.dot(B)
asterisk(*) 表示的是 element-wise multiplication,也就是 $C(i,j)=A(i,j)*B(i,j)$,所以元素數量要相同。
a = np.array([[1, 0, 2], [-1, 3, 1]])
b = np.array([[3, 1], [2, 1], [1, 0]])
print(a.dot(b))
'''
[[5 1]
[4 2]]
'''
print(b.dot(a))
'''
[[2 3 7]
[1 3 5]
[1 0 2]]
'''
print(a * b) # Error
More Matrix Operations
A = np.array([[1, 2], [3, 4]])
Ainv = np.linalg.inv(A)
print(Ainv)
'''
[[-2. 1. ]
[ 1.5 -0.5]]
'''
print(Ainv.dot(A))
'''
identity matrix
[[ 1.00000000e+00 4.44089210e-16]
[ 0.00000000e+00 1.00000000e+00]]
'''
print(A.dot(Ainv))
'''
[[ 1.00000000e+00 1.11022302e-16]
[ 0.00000000e+00 1.00000000e+00]]
'''
print(np.linalg.det(A)) # -2.0
print(np.diag(A)) # [1 4]
print(np.diag([1, 2]))
'''
[[1 0]
[0 2]]
'''
print(np.diag(A).sum()) # 5
print(np.trace(A)) # 5
a = np.array([1, 2])
b = np.array([3, 4])
print(np.outer(a, b)) # outer product (not cross product) 中文可能會有點誤解
'''
[[3 4]
[6 8]]
'''
x = np.random.randn(100, 3)
print(np.cov(x.T))
Solving a Linear System
Problem: $Ax=b$
Solution: $A^{-1}Ax=x=A^{-1}b$
Matrix inverse,Matrix multiply(dot)
A = np.array([[1, 2], [3, 4]])
b = np.array([1, 2])
x = np.linalg.inv(A).dot(b)
print(x) # [ 2.22044605e-16 5.00000000e-01] => [0 0.5]
x = np.linalg.solve(A, b)
print(x) # [ 0. 0.5]
$$
x+y=2200 \\
1.5x+4y=5050
$$
A = np.array([[1, 1], [1.5, 4]])
b = np.array([2200, 5050])
print(np.linalg.solve(A, b)) # [ 1500. 700.]
Pandas
import pandas as pd
Manual data loading
'''data.csv
1.2,1.5,1.7
8,9.154,10.5
0.3,0.8,0.4
3,2,8
'''
import numpy as np
x = []
for line in open("data.csv"):
row = line.split(',')
sample = map(float, row)
x.append(list(sample))
a = np.array(x)
print(a)
'''
[[ 1.2 1.5 1.7 ]
[ 8. 9.154 10.5 ]
[ 0.3 0.8 0.4 ]
[ 3. 2. 8. ]]
'''
print(a.shape) # (4, 3)
DataFrames
x = pd.read_csv("data.csv", header=None)
print(x)
'''
0 1 2
0 1.2 1.500 1.7
1 8.0 9.154 10.5
2 0.3 0.800 0.4
3 3.0 2.000 8.0
'''
print(type(x)) # <class 'pandas.core.frame.DataFrame'>
print(x.head(2))
'''
0 1 2
0 1.2 1.500 1.7
1 8.0 9.154 10.5
'''
More about DataFrames: Selecting Rows and Columns
Pandas : x[0] => column that has name 0
Numpy : x[0] => 0th row
data.csv:
1.2,1.5,1.7
8,9.154,10.5
0.3,0.8,0.4
3,2,8
in pandas:
x = pd.read_csv("data.csv", header=None)
print(x[0])
'''
0 1.2
1 8.0
2 0.3
3 3.0
'''
# 0th row
print(x.iloc[0])
print(x.ix[0])
'''
0 1.2
1 1.5
2 1.7
'''
print(x[[0, 1]]) # 第 0,1 行
'''
0 1
0 1.2 1.500
1 8.0 9.154
2 0.3 0.800
3 3.0 2.000
'''
print(x[x[0] < 1.5]) # 每一列的第 0 個小於 1.5 才印
'''
0 1 2
0 1.2 1.5 1.7
2 0.3 0.8 0.4
'''
print(x[0] < 1.5)
'''
0 True
1 False
2 True
3 False
'''
in numpy:
M = x.as_matrix()
print(type(M)) # <class 'numpy.ndarray'>
print(M[0]) # [ 1.2 1.5 1.7]
Even more about DataFrames: Column Names
another_data.csv:
"(nooooo)Name","(ahhhhhh)age","color"
"Jason",21,"Red"
"Matt",10,"Green"
"Shell",8,"Blue"
no
no
no
df = pd.read_csv("another_data.csv", engine="python", skipfooter=3) # skipfooter: 最後3列不要讀
print(df.columns) # ['(nooooo)Name', '(ahhhhhh)age', 'color']
df.columns = ["Name", "age", "color"]
print(df.columns) # ['Name', 'age', 'color']
print(df["Name"])
print(df.Name)
'''
0 Jason
1 Matt
2 Shell
'''
df["new_column"] = "new"
print(df.head())
'''
Name age color new_column
0 Jason 21 Red new
1 Matt 10 Green new
2 Shell 8 Blue new
'''
The apply() function
這邊看不太懂它的例子,拿其他試了一下。
def getName(d):
return d["Name"] + "123"
print(df.apply(getName, axis=1)) # axis=1: apply function to each row
print(df.apply(lambda x: x["Name"] + "123", axis=1))
'''
0 Jason123
1 Matt123
2 Shell123
'''
Joins
將兩個資料合併。
t1 = pd.read_csv("table1.csv")
t2 = pd.read_csv("table2.csv")
m = pd.merge(t1, t2, on="user_id")
t1.merge(t2, on="user_id")