import tensorflow as tf
import numpy as np
from show import show_graph

# 显存管理
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '0'    # 指定第一块GPU可用
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.5    # 最多允许占用50%显存
config.gpu_options.allow_growth = True      # 按需申请显存

构造计算图¶

# 占位符，X0接收t=0的输入，X1接收t=1的输入
X0 = tf.placeholder(tf.float32, [None, n_inputs])
X1 = tf.placeholder(tf.float32, [None, n_inputs])

# 权重，Wx是输入到神经元的权重，Wy是上一帧输出到这一帧神经元的权重，b为偏置
Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons],dtype=tf.float32))
Wy = tf.Variable(tf.random_normal(shape=[n_neurons,n_neurons],dtype=tf.float32))
b = tf.Variable(tf.zeros([1, n_neurons], dtype=tf.float32))

# 输出
Y0 = tf.tanh(tf.matmul(X0, Wx) + b)
Y1 = tf.tanh(tf.matmul(Y0, Wy) + tf.matmul(X1, Wx) + b)

# 全局初始化器
init = tf.global_variables_initializer()

训练¶

# 模拟四组输入
X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0
X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1

with tf.Session(config=config) as sess:
    init.run()
    Y0_val, Y1_val = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})

print(Y0_val)

[[-0.88967645 -0.86115813 -0.99212593  0.85438931  0.99889994]
 [-0.99998528 -1.         -0.99999946  0.99985707  1.        ]
 [-1.         -1.         -1.          0.99999988  1.        ]
 [-1.         -1.         -0.99846929  0.99887669 -0.88744193]]

print(Y1_val)

[[-1.         -1.         -1.          0.99997073  1.        ]
 [-0.31234568  0.40358639  0.98742425 -0.99910283  0.92220592]
 [-0.9999997  -1.         -0.99963617  0.95402563  1.        ]
 [-0.98156869 -0.99999994  0.82621282  0.98253846  0.99997246]]

显示计算图¶

用这种方式构造模型，如果要接收100步甚至更多，计算图将变的非常臃肿

show_graph(tf.get_default_graph())

相关参数设定¶

构造计算图¶

训练¶

显示计算图¶