import tensorflow as tf
import numpy as np
from show import show_graph
from tensorflow.examples.tutorials.mnist import input_data
# 显存管理
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '0' # 指定第一块GPU可用
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.5 # 最多允许占用50%显存
config.gpu_options.allow_growth = True # 按需申请显存
n_inputs = 28*28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")
def neuron_layer(X, n_neurons, name, activation=None):
with tf.name_scope(name):
# 从X中获取输入的大小(对第一层来说是图片的特征数量,对第二层来说是上一层的输出数量)
n_inputs = int(X.get_shape()[1])
# 创建W变量,初始化为2/sqrt(n_inputs)标准差的高斯分布随机数
stddev = 2 / np.sqrt(n_inputs)
init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
W = tf.Variable(init, name="weights")
# 创建b变量,初始化为0
b = tf.Variable(tf.zeros([n_neurons]), name="biases")
# 操作z=XW+b
z = tf.matmul(X, W) + b
# 如果需要激活则再通过一个relu,否则直接返回
if activation=='relu':
return tf.nn.relu(z)
else:
return z
with tf.name_scope('dnn'):
hidden1 = neuron_layer(X, n_hidden1, 'hidden1', activation='relu')
hidden2 = neuron_layer(hidden1, n_hidden2, 'hidden2', activation='relu')
logits = neuron_layer(hidden2, n_outputs, 'outputs')
也可以使用内置的全连接层
fully_connected()
默认使用relu激活
from tensorflow.contrib.layers import fully_connected
with tf.name_scope('dnn'):
hidden1 = fully_connected(X, n_hidden1, scope='hidden1')
hidden2 = fully_connected(hidden1, n_hidden2, scope='hidden2')
logits = fully_connected(hidden2, n_outputs, scope='outputs', activation_fn=None)
tf.nn.sparse_softmax_cross_entropy_with_logits()
将稀疏编码的输出(用 0~(n-1)
的id进行编码)经过softmax激活之后,计算交叉熵;相比于单独的softmax、交叉熵操作,该函数计算效率更高,而且也考虑到了一些logits为0等边缘情况;
类似的还有 tf.nn.softmax_cross_entropy_with_logits()
,它的区别在于接收的是独热码形式的输出
with tf.name_scope('loss'):
# 计算交叉熵
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=y, logits=logits)
# 取交叉熵的平均值作为损失函数
loss = tf.reduce_mean(xentropy, name='loss')
梯度下降优化器、目标是最小化损失函数
learning_rate = 0.01
with tf.name_scope("train"):
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
计算训练集上的准确率
tf.nn.in_top_k(predictions, targets, k, name=None)
函数用于判断每一组评估数据中,target是否出现在对应prediction的概率最高的前k项当中,如果是,则返回列表中对应项为True,否则为False
with tf.name_scope("eval"):
# 判断最高概率的预测结果是否与标记相符
correct = tf.nn.in_top_k(logits, y, 1)
# tf.case把boolean转为float32(True为1.0,False为0.0)
# 求平均数得到评估结果也即准确率
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
创建初始化器和保存器
init = tf.global_variables_initializer()
saver = tf.train.Saver()
mnist = input_data.read_data_sets("./datasets/mnist/")
n_epochs = 400
batch_size = 50
with tf.Session(config=config) as sess:
# 初始化变量
init.run()
# 循环n_epochs趟
for epoch in range(n_epochs):
# 每次把训练数据划分为batch_size个batch进行训练
for iteration in range(mnist.train.num_examples // batch_size):
X_batch, y_batch = mnist.train.next_batch(batch_size)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
# 评估准确率并打印
acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
acc_test = accuracy.eval(feed_dict={X: mnist.test.images,
y: mnist.test.labels})
print(epoch, "Train accuracy: ", acc_train, "Test accuracy: ", acc_test)
# 保存模型
save_path = saver.save(sess, "./10.2.chpt")
with tf.Session() as sess:
saver.restore(sess, "./10.2.ckpt")
X_new_scaled = [...] # some new images (scaled from 0 to 1)
Z = logits.eval(feed_dict={X: X_new_scaled})
y_pred = np.argmax(Z, axis=1)
如果需要获得每个分类的概率,可以加一个softmax