我已经创建了一个设置为零的张量,除了在[:, - 1,-1,0]的单个条目设置为1e10 . 然后我用随机内核对张量进行卷积 . 当批量大小(例如5)时,[:,0,0,:]的输出为零(因为输入数组为零),但当批量大(例如100)时,这些条目包含数字明显大于零 . 这只发生在GPU上 .
import tensorflow as tf
import numpy as np
def test(sess):
w = np.random.uniform(size=(5,5,64,64)).astype(np.float32)
x_t = tf.placeholder(dtype=tf.float32, shape=(None, 32, 32, 64))
w_t = tf.constant(w)
o_t = tf.nn.conv2d(x_t, w_t, [1,1,1,1], 'VALID', data_format='NHWC')
x_good = np.zeros((5,32,32, 64), dtype=np.float32)
x_good[:,-1,-1,0] = 1e10
x_bad = np.zeros((100,32,32, 64), dtype=np.float32)
x_bad[:,-1,-1,0] = 1e10
o_good = sess.run(o_t, feed_dict={x_t : x_good})
o_bad = sess.run(o_t, feed_dict={x_t : x_bad})
print('Number nonzero (good): ', np.count_nonzero(o_good[:,0,0,:]))
print('Number nonzero (bad): ', np.count_nonzero(o_bad[:,0,0,:]))
if np.count_nonzero(o_bad[:,0,0,:]) > 0:
print(o_bad[0,0,0,:])
with tf.Session() as sess:
with tf.device('cpu:0'):
print('Testing on cpu -- should succeed')
test(sess)
with tf.device('gpu:0'):
print('Testing on gpu -- typically fails')
test(sess)