本章知识小结:
1、回归步骤
2、分类—感知机
3、分类—逻辑回归
4、正则化
1、使用Python实现 虽然每种算法(类似分类和回归)的具体方法不同,但这个基本方法对于其他机器学习算法来说也是相同的。
2、回归 2.1 线性回归(regression1_linear) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 import numpy as npimport matplotlib.pyplot as plttrain = np.loadtxt('click.csv' , delimiter=',' , dtype='int' , skiprows=1 ) train_x = train[:,0 ] train_y = train[:,1 ] mu = train_x.mean() sigma = train_x.std() def standardize (x ): return (x - mu) / sigma train_z = standardize(train_x) theta0 = np.random.rand() theta1 = np.random.rand() def f (x ): return theta0 + theta1 * x def E (x, y ): return 0.5 * np.sum ((y - f(x)) ** 2 ) ETA = 1e-3 diff = 1 count = 0 error = E(train_z, train_y) while diff > 1e-2 : tmp_theta0 = theta0 - ETA * np.sum ((f(train_z) - train_y)) tmp_theta1 = theta1 - ETA * np.sum ((f(train_z) - train_y) * train_z) theta0 = tmp_theta0 theta1 = tmp_theta1 current_error = E(train_z, train_y) diff = error - current_error error = current_error count += 1 log = '第 {} 次 : theta0 = {:.3f}, theta1 = {:.3f}, 差值 = {:.4f}' print (log.format (count, theta0, theta1, diff)) x = np.linspace(-3 , 3 , 100 ) plt.plot(train_z, train_y, 'o' ) plt.plot(x, f(x)) plt.show() print (f(standardize(100 )))print (f(standardize(200 )))print (f(standardize(300 )))
2.2 多项式回归(regression2_polynomial)
np.dot(x,y)可以用于计算向量内积、多维矩阵乘法和矩阵与向量的乘法
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 import numpy as npimport matplotlib.pyplot as plttrain = np.loadtxt('click.csv' , delimiter=',' , dtype='int' , skiprows=1 ) train_x = train[:,0 ] train_y = train[:,1 ] mu = train_x.mean() sigma = train_x.std() def standardize (x ): return (x - mu) / sigma train_z = standardize(train_x) theta = np.random.rand(3 ) def to_matrix (x ): return np.vstack([np.ones(x.size), x, x ** 2 ]).T X = to_matrix(train_z) def f (x ): return np.dot(x, theta) def E (x, y ): return 0.5 * np.sum ((y - f(x)) ** 2 ) ETA = 1e-3 diff = 1 count = 0 error = E(X, train_y) while diff > 1e-2 : theta = theta - ETA * np.dot(f(X) - train_y, X) current_error = E(X, train_y) diff = error - current_error error = current_error count += 1 log = '第 {} 次 : theta = {}, 差值 = {:.4f}' print (log.format (count, theta, diff)) x = np.linspace(-3 , 3 , 100 ) plt.plot(train_z, train_y, 'o' ) plt.plot(x, f(to_matrix(x›))) plt.show()
2.3 regression3_sgd
加入MSE均方误差,使用随机梯度下降
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 import numpy as npimport matplotlib.pyplot as plttrain = np.loadtxt('click.csv' , delimiter=',' , dtype='int' , skiprows=1 ) train_x = train[:,0 ] train_y = train[:,1 ] mu = train_x.mean() sigma = train_x.std() def standardize (x ): return (x - mu) / sigma train_z = standardize(train_x) theta = np.random.rand(3 ) def to_matrix (x ): return np.vstack([np.ones(x.size), x, x ** 2 ]).T X = to_matrix(train_z) def f (x ): return np.dot(x, theta) def MSE (x, y ): return (1 / x.shape[0 ]) * np.sum ((y - f(x)) ** 2 ) ETA = 1e-3 diff = 1 count = 0 error = MSE(X, train_y) while diff > 1e-2 : p = np.random.permutation(X.shape[0 ]) for x, y in zip (X[p,:], train_y[p]): theta = theta - ETA * (f(x) - y) * x current_error = MSE(X, train_y) diff = error - current_error error = current_error count += 1 log = '第 {} 次 : theta = {}, 差值 = {:.4f}' print (log.format (count, theta, diff)) x = np.linspace(-3 , 3 , 100 ) plt.plot(train_z, train_y, 'o' ) plt.plot(x, f(to_matrix(x))) plt.show()
3、分类—感知机 3.1 classification1_perceptron
x1,x2,y
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 import numpy as npimport matplotlib.pyplot as plttrain = np.loadtxt('images1.csv' , delimiter=',' , skiprows=1 ) train_x = train[:,0 :2 ] train_y = train[:,2 ] w = np.random.rand(2 ) def f (x ): if np.dot(w, x) >= 0 : return 1 else : return -1 epoch = 10 count = 0 for _ in range (epoch): for x, y in zip (train_x, train_y): if f(x) != y: w = w + y * x count += 1 print ('第 {} 次 : w = {}' .format (count, w)) x1 = np.arange(0 , 500 ) plt.plot(train_x[train_y == 1 , 0 ], train_x[train_y == 1 , 1 ], 'o' ) plt.plot(train_x[train_y == -1 , 0 ], train_x[train_y == -1 , 1 ], 'x' ) plt.plot(x1, -w[0 ] / w[1 ] * x1, linestyle='dashed' ) plt.show()
4、分类—逻辑回归 4.1 classification2_logistic_regression 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 import numpy as npimport matplotlib.pyplot as plttrain = np.loadtxt('images2.csv' , delimiter=',' , skiprows=1 ) train_x = train[:,0 :2 ] train_y = train[:,2 ] theta = np.random.rand(3 ) mu = train_x.mean(axis=0 ) sigma = train_x.std(axis=0 ) def standardize (x ): return (x - mu) / sigma train_z = standardize(train_x) def to_matrix (x ): x0 = np.ones([x.shape[0 ], 1 ]) return np.hstack([x0, x]) X = to_matrix(train_z) def f (x ): return 1 / (1 + np.exp(-np.dot(x, theta))) def classify (x ): return (f(x) >= 0.5 ).astype(np.int ) ETA = 1e-3 epoch = 5000 count = 0 for _ in range (epoch): theta = theta - ETA * np.dot(f(X) - train_y, X) count += 1 print ('第 {} 次 : theta = {}' .format (count, theta)) x0 = np.linspace(-2 , 2 , 100 ) plt.plot(train_z[train_y == 1 , 0 ], train_z[train_y == 1 , 1 ], 'o' ) plt.plot(train_z[train_y == 0 , 0 ], train_z[train_y == 0 , 1 ], 'x' ) plt.plot(x0, -(theta[0 ] + theta[1 ] * x0) / theta[2 ], linestyle='dashed' ) plt.show()
4.2 classification3_logistic_polynomial_regression 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 import numpy as npimport matplotlib.pyplot as plttrain = np.loadtxt('data3.csv' , delimiter=',' , skiprows=1 ) train_x = train[:,0 :2 ] train_y = train[:,2 ] theta = np.random.rand(4 ) mu = train_x.mean(axis=0 ) sigma = train_x.std(axis=0 ) def standardize (x ): return (x - mu) / sigma train_z = standardize(train_x) def to_matrix (x ): x0 = np.ones([x.shape[0 ], 1 ]) x3 = x[:,0 ,np.newaxis] ** 2 return np.hstack([x0, x, x3]) X = to_matrix(train_z) def f (x ): return 1 / (1 + np.exp(-np.dot(x, theta))) def classify (x ): return (f(x) >= 0.5 ).astype(np.int ) ETA = 1e-3 epoch = 5000 count = 0 for _ in range (epoch): theta = theta - ETA * np.dot(f(X) - train_y, X) count += 1 print ('第 {} 次 : theta = {}' .format (count, theta)) x1 = np.linspace(-2 , 2 , 100 ) x2 = -(theta[0 ] + theta[1 ] * x1 + theta[3 ] * x1 ** 2 ) / theta[2 ] plt.plot(train_z[train_y == 1 , 0 ], train_z[train_y == 1 , 1 ], 'o' ) plt.plot(train_z[train_y == 0 , 0 ], train_z[train_y == 0 , 1 ], 'x' ) plt.plot(x1, x2, linestyle='dashed' ) plt.show()
4.3 classification4_logistic_regression_sgd 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 import numpy as npimport matplotlib.pyplot as plttrain = np.loadtxt('data3.csv' , delimiter=',' , skiprows=1 ) train_x = train[:,0 :2 ] train_y = train[:,2 ] theta = np.random.rand(4 ) mu = train_x.mean(axis=0 ) sigma = train_x.std(axis=0 ) def standardize (x ): return (x - mu) / sigma train_z = standardize(train_x) def to_matrix (x ): x0 = np.ones([x.shape[0 ], 1 ]) x3 = x[:,0 ,np.newaxis] ** 2 return np.hstack([x0, x, x3]) X = to_matrix(train_z) def f (x ): return 1 / (1 + np.exp(-np.dot(x, theta))) def classify (x ): return (f(x) >= 0.5 ).astype(np.int ) ETA = 1e-3 epoch = 5000 count = 0 for _ in range (epoch): p = np.random.permutation(X.shape[0 ]) for x, y in zip (X[p,:], train_y[p]): theta = theta - ETA * (f(x) - y) * x count += 1 print ('第 {} 次 : theta = {}' .format (count, theta)) x1 = np.linspace(-2 , 2 , 100 ) x2 = -(theta[0 ] + theta[1 ] * x1 + theta[3 ] * x1 ** 2 ) / theta[2 ] plt.plot(train_z[train_y == 1 , 0 ], train_z[train_y == 1 , 1 ], 'o' ) plt.plot(train_z[train_y == 0 , 0 ], train_z[train_y == 0 , 1 ], 'x' ) plt.plot(x1, x2, linestyle='dashed' ) plt.show()
5、正则化(regularization) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 import numpy as npimport matplotlib.pyplot as pltdef g (x ): return 0.1 * (x ** 3 + x ** 2 + x) train_x = np.linspace(-2 , 2 , 8 ) train_y = g(train_x) + np.random.randn(train_x.size) * 0.05 mu = train_x.mean() sigma = train_x.std() def standardize (x ): return (x - mu) / sigma train_z = standardize(train_x) def to_matrix (x ): return np.vstack([ np.ones(x.size), x, x ** 2 , x ** 3 , x ** 4 , x ** 5 , x ** 6 , x ** 7 , x ** 8 , x ** 9 , x ** 10 ]).T X = to_matrix(train_z) theta = np.random.randn(X.shape[1 ]) def f (x ): return np.dot(x, theta) def E (x, y ): return 0.5 * np.sum ((y - f(x)) ** 2 ) LAMBDA = 0.5 ETA = 1e-4 diff = 1 error = E(X, train_y) while diff > 1e-6 : theta = theta - ETA * (np.dot(f(X) - train_y, X)) current_error = E(X, train_y) diff = error - current_error error = current_error theta1 = theta theta = np.random.randn(X.shape[1 ]) diff = 1 error = E(X, train_y) while diff > 1e-6 : reg_term = LAMBDA * np.hstack([0 , theta[1 :]]) theta = theta - ETA * (np.dot(f(X) - train_y, X) + reg_term) current_error = E(X, train_y) diff = error - current_error error = current_error theta2 = theta plt.plot(train_z, train_y, 'o' ) z = standardize(np.linspace(-2 , 2 , 100 )) theta = theta1 plt.plot(z, f(to_matrix(z)), linestyle='dashed' ) theta = theta2 plt.plot(z, f(to_matrix(z))) plt.show()
6、后话 除了课程中见到的方法外,还有很多相关的知识方法,比如梯度下降法有几个亚种(动量法、Adagrad、Adadelta、Adam等),比如优化的方法除了线性回归、感知机和逻辑回归。(还有很多随机森林,支持向量机和朴素贝叶斯等)
学习只是一个方面,我们还应该可以应用在实践中,做一些工具优化日常的工作(筛选服务对话)。