In [1]:
import numpy as np
import tensorflow as tf
from sklearn.datasets import fetch_california_housing
from show import show_graph    # 辅助函数,显示计算图
/usr/local/lib/python3.5/dist-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
In [2]:
# 趟数
n_epochs = 1000
# 学习率
learning_rate = 0.01
In [3]:
# 载入数据
housing = fetch_california_housing()
m, n = housing.data.shape
In [4]:
m, n
Out[4]:
(20640, 8)
In [5]:
# 数据归一化(使用梯度下降时,一定要注意数据归一化处理)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
In [6]:
# 创建自变量、因变量结点(同Normal Equation)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")
In [7]:
# 创建theta系数,用均匀分布、范围-1~1的随机数(以42为种子)进行初始化
theta = tf.Variable( tf.random_uniform( [n+1, 1], -1., 1., seed=42 ), name='theta' )

$ \widehat{y} = X \cdot \theta $

In [8]:
# 预测的y
y_pred = tf.matmul(X, theta, name='predictions')

计算均方误差(损失函数) $$ MSE = \frac{1}{m} \sum^{m} error_i^2 $$

In [9]:
error = y_pred - y
mse = tf.reduce_mean( tf.square(error), name='mse' )

计算梯度

$ \theta $更新公式——
$ \theta = \theta - \alpha \bigtriangledown_\theta MSE(\theta) $
1. 手动求导
$$ \frac{d\ MSE}{d\ error_i} = \frac{1}{2} error_i $$

In [10]:
# # 手动求导,直接给出求导公式
# gradients = 2/m * tf.matmul(tf.transpose(X), error)
# training_op = tf.assign( theta, theta - learning_rate * gradients )

2. 利用tf提供的自动微分(autodiff)
tf使用的是擅长多输入少输出(如神经网络)的Reverse-mode autodiff,具有较高的准确性和泛用性;

In [11]:
# # 自动微分
# gradients = tf.gradients(mse, [theta])[0]
# training_op = tf.assign( theta, theta - learning_rate * gradients )

3. 使用优化器

In [12]:
# 使用梯度下降优化器
optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate )
# # 还可以使用其他优化器,如:
# optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)

# 指定优化目标——最小化mse
training_op = optimizer.minimize(mse)
In [13]:
init = tf.global_variables_initializer()
In [14]:
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
        
    best_theta = theta.eval()
Epoch 0 MSE = 2.7544262
Epoch 100 MSE = 0.63222194
Epoch 200 MSE = 0.5727803
Epoch 300 MSE = 0.5585008
Epoch 400 MSE = 0.54907
Epoch 500 MSE = 0.54228795
Epoch 600 MSE = 0.5373791
Epoch 700 MSE = 0.53382194
Epoch 800 MSE = 0.5312425
Epoch 900 MSE = 0.5293705
In [15]:
best_theta
Out[15]:
array([[ 2.06855226e+00],
       [ 7.74078071e-01],
       [ 1.31192386e-01],
       [-1.17845066e-01],
       [ 1.64778143e-01],
       [ 7.44078017e-04],
       [-3.91945131e-02],
       [-8.61356676e-01],
       [-8.23479772e-01]], dtype=float32)