import numpy as np
import tensorflow as tf
from sklearn.datasets import fetch_california_housing
from show import show_graph    # 辅助函数，显示计算图

/usr/local/lib/python3.5/dist-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters

# 趟数
n_epochs = 1000
# 学习率
learning_rate = 0.01

# 载入数据
housing = fetch_california_housing()
m, n = housing.data.shape

m, n

(20640, 8)

# 数据归一化（使用梯度下降时，一定要注意数据归一化处理）
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)

# 创建自变量、因变量结点（同Normal Equation）
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")

# 创建theta系数，用均匀分布、范围-1~1的随机数（以42为种子）进行初始化
theta = tf.Variable( tf.random_uniform( [n+1, 1], -1., 1., seed=42 ), name='theta' )

$ \widehat{y} = X \cdot \theta $

# 预测的y
y_pred = tf.matmul(X, theta, name='predictions')

计算均方误差（损失函数） $$ MSE = \frac{1}{m} \sum^{m} error_i^2 $$

error = y_pred - y
mse = tf.reduce_mean( tf.square(error), name='mse' )

计算梯度¶

$ \theta $更新公式——
$ \theta = \theta - \alpha \bigtriangledown_\theta MSE(\theta) $
1. 手动求导
$$ \frac{d\ MSE}{d\ error_i} = \frac{1}{2} error_i $$

# # 手动求导，直接给出求导公式
# gradients = 2/m * tf.matmul(tf.transpose(X), error)
# training_op = tf.assign( theta, theta - learning_rate * gradients )

2. 利用tf提供的自动微分（autodiff）
tf使用的是擅长多输入少输出（如神经网络）的Reverse-mode autodiff，具有较高的准确性和泛用性；

# # 自动微分
# gradients = tf.gradients(mse, [theta])[0]
# training_op = tf.assign( theta, theta - learning_rate * gradients )

3. 使用优化器

# 使用梯度下降优化器
optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate )
# # 还可以使用其他优化器，如：
# optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)

# 指定优化目标——最小化mse
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)
        
    best_theta = theta.eval()

Epoch 0 MSE = 2.7544262
Epoch 100 MSE = 0.63222194
Epoch 200 MSE = 0.5727803
Epoch 300 MSE = 0.5585008
Epoch 400 MSE = 0.54907
Epoch 500 MSE = 0.54228795
Epoch 600 MSE = 0.5373791
Epoch 700 MSE = 0.53382194
Epoch 800 MSE = 0.5312425
Epoch 900 MSE = 0.5293705

best_theta

array([[ 2.06855226e+00],
       [ 7.74078071e-01],
       [ 1.31192386e-01],
       [-1.17845066e-01],
       [ 1.64778143e-01],
       [ 7.44078017e-04],
       [-3.91945131e-02],
       [-8.61356676e-01],
       [-8.23479772e-01]], dtype=float32)