🖼 人工智能学习总目录

🖼《白话机器学习数学》小结及目录

本章知识小结:

  • 1、回归步骤
  • 2、分类—感知机
  • 3、分类—逻辑回归
  • 4、正则化

1、使用Python实现

虽然每种算法(类似分类和回归)的具体方法不同,但这个基本方法对于其他机器学习算法来说也是相同的。

2、回归

2.1 线性回归(regression1_linear)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import numpy as np
import matplotlib.pyplot as plt

# 读入训练数据
train = np.loadtxt('click.csv', delimiter=',', dtype='int', skiprows=1)
train_x = train[:,0]
train_y = train[:,1]

# 标准化
mu = train_x.mean()
sigma = train_x.std()
def standardize(x):
return (x - mu) / sigma

train_z = standardize(train_x)

# 参数初始化
theta0 = np.random.rand()
theta1 = np.random.rand()

# 预测函数
def f(x):
return theta0 + theta1 * x

# 目标函数
def E(x, y):
return 0.5 * np.sum((y - f(x)) ** 2)

# 学习率
ETA = 1e-3

# 误差的差值
diff = 1

# 更新次数
count = 0

# 直到误差的差值小于 0.01 为止,重复参数更新
error = E(train_z, train_y)
while diff > 1e-2:
# 更新结果保存到临时变量
tmp_theta0 = theta0 - ETA * np.sum((f(train_z) - train_y))
tmp_theta1 = theta1 - ETA * np.sum((f(train_z) - train_y) * train_z)

# 更新参数
theta0 = tmp_theta0
theta1 = tmp_theta1

# 计算与上一次误差的差值
current_error = E(train_z, train_y)
diff = error - current_error
error = current_error

# 输出日志
count += 1
log = '第 {} 次 : theta0 = {:.3f}, theta1 = {:.3f}, 差值 = {:.4f}'
print(log.format(count, theta0, theta1, diff))

# 绘图确认
x = np.linspace(-3, 3, 100)
plt.plot(train_z, train_y, 'o')
plt.plot(x, f(x))
plt.show()

# 验证
print(f(standardize(100)))
print(f(standardize(200)))
print(f(standardize(300)))

2.2 多项式回归(regression2_polynomial)

np.dot(x,y)可以用于计算向量内积、多维矩阵乘法和矩阵与向量的乘法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import numpy as np
import matplotlib.pyplot as plt

# 读入训练数据
train = np.loadtxt('click.csv', delimiter=',', dtype='int', skiprows=1)
train_x = train[:,0]
train_y = train[:,1]

# 标准化
mu = train_x.mean()
sigma = train_x.std()
def standardize(x):
return (x - mu) / sigma

train_z = standardize(train_x)

# 参数初始化
theta = np.random.rand(3)

# 创建训练数据的矩阵
# 组合成矩阵vstack在竖直方向上堆叠,hstack在水平方向上平铺
def to_matrix(x):
return np.vstack([np.ones(x.size), x, x ** 2]).T

X = to_matrix(train_z)

# 预测函数
def f(x):
return np.dot(x, theta) #点积不是矩阵积

# 目标函数
def E(x, y):
return 0.5 * np.sum((y - f(x)) ** 2)

# 学习率
ETA = 1e-3

# 误差的差值
diff = 1

# 更新次数
count = 0

# 直到误差的差值小于 0.01 为止,重复参数更新
error = E(X, train_y)
# 记录均方误差的历史记录
# errors = []
# errors.append(MSE(X, train_y))
while diff > 1e-2:
# 更新结果保存到临时变量
theta = theta - ETA * np.dot(f(X) - train_y, X) #f^(T)X

# 计算与上一次误差的差值
current_error = E(X, train_y)
diff = error - current_error
error = current_error
# x = np.arange(len(errors))
# diff = errors[-2] - errors[-1]

# 输出日志
count += 1
log = '第 {} 次 : theta = {}, 差值 = {:.4f}'
print(log.format(count, theta, diff))

# 绘图确认
# 绘制误差变化图
# x = np.arange(len(errors))
# plt.plot(x, errors)
# plt.show()
x = np.linspace(-3, 3, 100) # -3~3的100个数字
plt.plot(train_z, train_y, 'o')
plt.plot(x, f(to_matrix(x›)))
plt.show()

实际值与预测值

误差变化图

2.3 regression3_sgd

加入MSE均方误差,使用随机梯度下降

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import numpy as np
import matplotlib.pyplot as plt

# 读入训练数据
train = np.loadtxt('click.csv', delimiter=',', dtype='int', skiprows=1)
train_x = train[:,0]
train_y = train[:,1]

# 标准化
mu = train_x.mean()
sigma = train_x.std()
def standardize(x):
return (x - mu) / sigma

train_z = standardize(train_x)

# 参数初始化
theta = np.random.rand(3)

# 创建训练数据的矩阵
def to_matrix(x):
return np.vstack([np.ones(x.size), x, x ** 2]).T

X = to_matrix(train_z)

# 预测函数
def f(x):
return np.dot(x, theta)

# 均方误差
def MSE(x, y):
return (1 / x.shape[0]) * np.sum((y - f(x)) ** 2)

# 学习率
ETA = 1e-3

# 误差的差值
diff = 1

# 更新次数
count = 0

# 重复学习
error = MSE(X, train_y)
while diff > 1e-2:
# 使用随机梯度下降法更新参数
# premutation() 随机产生序列
p = np.random.permutation(X.shape[0])

# 变量 x y 遍历 zip 标准化序列的行列元素
for x, y in zip(X[p,:], train_y[p]):
theta = theta - ETA * (f(x) - y) * x

# 计算与上一次误差的差值
current_error = MSE(X, train_y)
diff = error - current_error
error = current_error

# 输出日志
count += 1
log = '第 {} 次 : theta = {}, 差值 = {:.4f}'
print(log.format(count, theta, diff))

# 绘图确认
x = np.linspace(-3, 3, 100)
plt.plot(train_z, train_y, 'o')
plt.plot(x, f(to_matrix(x)))
plt.show()

多重回归多标准化,可以测试Iris数据集

3、分类—感知机

3.1 classification1_perceptron

x1,x2,y

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import numpy as np
import matplotlib.pyplot as plt

# 读入训练数据
train = np.loadtxt('images1.csv', delimiter=',', skiprows=1)
train_x = train[:,0:2]
train_y = train[:,2]

# 权重初始化
w = np.random.rand(2)

# 判别函数
def f(x):
if np.dot(w, x) >= 0:
return 1
else:
return -1

# 重复次数
epoch = 10

# 更新次数
count = 0

# 学习权重
for _ in range(epoch):
for x, y in zip(train_x, train_y):
if f(x) != y:
w = w + y * x

# 输出日志
count += 1
print('第 {} 次 : w = {}'.format(count, w))

# 绘图确认
x1 = np.arange(0, 500)
plt.plot(train_x[train_y == 1, 0], train_x[train_y == 1, 1], 'o')
plt.plot(train_x[train_y == -1, 0], train_x[train_y == -1, 1], 'x')
plt.plot(x1, -w[0] / w[1] * x1, linestyle='dashed')
plt.show()

预测图

4、分类—逻辑回归

4.1 classification2_logistic_regression

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import numpy as np
import matplotlib.pyplot as plt

# 读入训练数据
train = np.loadtxt('images2.csv', delimiter=',', skiprows=1)
train_x = train[:,0:2]
train_y = train[:,2]

# 参数初始化
theta = np.random.rand(3)

# 标准化
mu = train_x.mean(axis=0)
sigma = train_x.std(axis=0)
def standardize(x):
return (x - mu) / sigma

train_z = standardize(train_x)

# 增加 x0
def to_matrix(x):
x0 = np.ones([x.shape[0], 1])
return np.hstack([x0, x])

X = to_matrix(train_z)

# sigmoid 函数
def f(x):
return 1 / (1 + np.exp(-np.dot(x, theta)))

# 分类函数
def classify(x):
return (f(x) >= 0.5).astype(np.int)

# 学习率
ETA = 1e-3

# 重复次数
epoch = 5000

# 更新次数
count = 0

# 重复学习
for _ in range(epoch):
theta = theta - ETA * np.dot(f(X) - train_y, X)

# 日志输出
count += 1
print('第 {} 次 : theta = {}'.format(count, theta))

# 绘图确认
x0 = np.linspace(-2, 2, 100)
plt.plot(train_z[train_y == 1, 0], train_z[train_y == 1, 1], 'o')
plt.plot(train_z[train_y == 0, 0], train_z[train_y == 0, 1], 'x')
plt.plot(x0, -(theta[0] + theta[1] * x0) / theta[2], linestyle='dashed')
plt.show()

4.2 classification3_logistic_polynomial_regression

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import numpy as np
import matplotlib.pyplot as plt

# 读入训练数据
train = np.loadtxt('data3.csv', delimiter=',', skiprows=1)
train_x = train[:,0:2]
train_y = train[:,2]

# 参数初始化
theta = np.random.rand(4)

# 标准化
mu = train_x.mean(axis=0)
sigma = train_x.std(axis=0)
def standardize(x):
return (x - mu) / sigma

train_z = standardize(train_x)

# 增加 x0 和 x3
def to_matrix(x):
x0 = np.ones([x.shape[0], 1])
x3 = x[:,0,np.newaxis] ** 2
return np.hstack([x0, x, x3])

X = to_matrix(train_z)

# sigmoid 函数
def f(x):
return 1 / (1 + np.exp(-np.dot(x, theta)))

# 分类函数
def classify(x):
return (f(x) >= 0.5).astype(np.int)

# 学习率
ETA = 1e-3

# 重复次数
epoch = 5000

# 更新次数
count = 0

# 重复学习
for _ in range(epoch):
theta = theta - ETA * np.dot(f(X) - train_y, X)

# 日志输出
count += 1
print('第 {} 次 : theta = {}'.format(count, theta))

# 绘图确认
x1 = np.linspace(-2, 2, 100)
x2 = -(theta[0] + theta[1] * x1 + theta[3] * x1 ** 2) / theta[2]
plt.plot(train_z[train_y == 1, 0], train_z[train_y == 1, 1], 'o')
plt.plot(train_z[train_y == 0, 0], train_z[train_y == 0, 1], 'x')
plt.plot(x1, x2, linestyle='dashed')
plt.show()

精度预测图

4.3 classification4_logistic_regression_sgd

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import numpy as np
import matplotlib.pyplot as plt

# 读入训练数据
train = np.loadtxt('data3.csv', delimiter=',', skiprows=1)
train_x = train[:,0:2]
train_y = train[:,2]

# 参数初始化
theta = np.random.rand(4)

# 标准化
mu = train_x.mean(axis=0)
sigma = train_x.std(axis=0)
def standardize(x):
return (x - mu) / sigma

train_z = standardize(train_x)

# 增加 x0 和 x3
def to_matrix(x):
x0 = np.ones([x.shape[0], 1])
x3 = x[:,0,np.newaxis] ** 2
return np.hstack([x0, x, x3])

X = to_matrix(train_z)

# sigmoid 函数
def f(x):
return 1 / (1 + np.exp(-np.dot(x, theta)))

# 分类函数
def classify(x):
return (f(x) >= 0.5).astype(np.int)

# 学习率
ETA = 1e-3

# 重复次数
epoch = 5000

# 更新次数
count = 0

# 重复学习
for _ in range(epoch):
# 使用随机梯度下降法更新参数
p = np.random.permutation(X.shape[0])
for x, y in zip(X[p,:], train_y[p]):
theta = theta - ETA * (f(x) - y) * x

# 日志输出
count += 1
print('第 {} 次 : theta = {}'.format(count, theta))

# 绘图确认
x1 = np.linspace(-2, 2, 100)
x2 = -(theta[0] + theta[1] * x1 + theta[3] * x1 ** 2) / theta[2]
plt.plot(train_z[train_y == 1, 0], train_z[train_y == 1, 1], 'o')
plt.plot(train_z[train_y == 0, 0], train_z[train_y == 0, 1], 'x')
plt.plot(x1, x2, linestyle='dashed')
plt.show()

分类效果图

5、正则化(regularization)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import numpy as np
import matplotlib.pyplot as plt

# 真正的函数
def g(x):
return 0.1 * (x ** 3 + x ** 2 + x)

# 随意准备一些向真正的函数加入了一点噪声的训练数据
train_x = np.linspace(-2, 2, 8)
train_y = g(train_x) + np.random.randn(train_x.size) * 0.05

# 标准化
mu = train_x.mean()
sigma = train_x.std()
def standardize(x):
return (x - mu) / sigma

train_z = standardize(train_x)

# 创建训练数据的矩阵
def to_matrix(x):
return np.vstack([
np.ones(x.size),
x,
x ** 2,
x ** 3,
x ** 4,
x ** 5,
x ** 6,
x ** 7,
x ** 8,
x ** 9,
x ** 10
]).T

X = to_matrix(train_z)

# 参数初始化
theta = np.random.randn(X.shape[1])

# 预测函数
def f(x):
return np.dot(x, theta)

# 目标函数
def E(x, y):
return 0.5 * np.sum((y - f(x)) ** 2)

# 正则化常量
LAMBDA = 0.5

# 学习率
ETA = 1e-4

# 误差
diff = 1

# 重复学习(不应用正则化)
error = E(X, train_y)
while diff > 1e-6:
theta = theta - ETA * (np.dot(f(X) - train_y, X))

current_error = E(X, train_y)
diff = error - current_error
error = current_error

theta1 = theta

# 重复学习(应用正则化)
theta = np.random.randn(X.shape[1])
diff = 1
error = E(X, train_y)
while diff > 1e-6:
reg_term = LAMBDA * np.hstack([0, theta[1:]])
theta = theta - ETA * (np.dot(f(X) - train_y, X) + reg_term)

current_error = E(X, train_y)
diff = error - current_error
error = current_error

theta2 = theta

# 绘图确认
plt.plot(train_z, train_y, 'o')
z = standardize(np.linspace(-2, 2, 100))
theta = theta1 # 未应用正则化
plt.plot(z, f(to_matrix(z)), linestyle='dashed')
theta = theta2 # 应用正则化
plt.plot(z, f(to_matrix(z)))
plt.show()

课本图

实际图

6、后话

除了课程中见到的方法外,还有很多相关的知识方法,比如梯度下降法有几个亚种(动量法、Adagrad、Adadelta、Adam等),比如优化的方法除了线性回归、感知机和逻辑回归。(还有很多随机森林,支持向量机和朴素贝叶斯等)

学习只是一个方面,我们还应该可以应用在实践中,做一些工具优化日常的工作(筛选服务对话)。